STATPACK examples

ex1_apply_q_bd.F90

program ex1_apply_q_bd
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines APPLY_Q_BD and APPLY_P_BD
!   in module SVD_Procedures for applying Householder transformations stored during the
!   bidiagonal reduction of a real matrix to other matrices.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP, BD_SVD and BD_INVITER
!    in module SVD_Procedures for computing a partial or full SVD of a real matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, bd_inviter,     &
                         bd_cmp, bd_svd, apply_q_bd, apply_p_bd, norm, c50
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP nsing SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=1000, m=500, mn=min(m,n), nsing=20, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of apply_q_bd'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                     :: err, eps, elapsed_time
    real(stnd), dimension(n,m)     :: a, a2
    real(stnd), dimension(n,nsing) :: leftvec
    real(stnd), dimension(m,nsing) :: rightvec
    real(stnd), dimension(mn)      :: s, d, e, e2, tauq, taup
!
    integer      :: istart, iend, irate, imax, itime
!
    logical(lgl) :: failure1, failure2, bd_is_upper
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : BIDIAGONAL REDUCTION AND PARTIAL SVD OF A n-by-m REAL MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
!
!   GENERATE A RANDOM DATA MATRIX a.
!
    call random_number( a )
!
!   SAVE RANDOM DATA MATRIX a .
!
    a2(:n,:m) = a(:n,:m)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   REDUCE a TO LOWER OR UPPER BIDIAGONAL FORM.
!
    call bd_cmp( a, s, e2, tauq, taup )
!
!   THE DIAGONAL ELEMENTS ARE STORED IN s .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e2 .
!
!   SAVE BIDIAGONAL FORM OF a .
!
    e(:mn) = e2(:mn)
    d(:mn) = s(:mn)
!
    bd_is_upper = n>=m
!
!   COMPUTE SINGULAR VALUES OF BIDIAGONAL FORM OF a .
!
    call bd_svd( bd_is_upper, s, e2, failure1, sort=sort  )
!
!   COMPUTE THE FIRST nsing SINGULAR VECTORS OF a BY maxiter
!   INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!   d_e AND BACK-TRANSFORMATION.
!
    call bd_inviter( bd_is_upper, d, e, s(:nsing), leftvec(:mn,:nsing), rightvec(:mn,:nsing),  &
                     failure2, maxiter=maxiter )
!
!   COMPUTE SINGULAR VECTORS OF a BY BACK-TRANSFORMATION.
!
    if ( bd_is_upper ) then
        leftvec(mn+1_i4b:n,:nsing) = zero
    else
        rightvec(mn+1_i4b:m,:nsing) = zero
    end if
!
!   GENERATE LEFT SINGULAR VECTORS OF a IN leftvec BY BACK-TRANSFORMATION.
!
    call apply_q_bd( a, tauq, leftvec,  left=true, trans=false )
!
!   GENERATE RIGHT SINGULAR VECTORS OF a IN rightvec BY BACK-TRANSFORMATION.
!
    call apply_p_bd( a, taup, rightvec, left=true, trans=false )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*rightvec - leftvec*s(:nsing),
!   WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS.
!
    err =  norm( matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n) )/( sum( abs(s(:mn)) )*real(mn,stnd) )
!
    if ( err<=eps .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_svd() )     = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_inviter() ) = ', failure2
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_apply_q_bd
! =============================
!
end program ex1_apply_q_bd

ex1_bd_cmp.F90

program ex1_bd_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines BD_CMP and ORTHO_GEN_BD
!   in module SVD_Procedures for reducing to bidiagonal form a real matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, bd_cmp,         &
                         ortho_gen_bd, norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000, nm=min(n,m)
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of bd_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, tauq, taup
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid, bd, p
!
    integer(i4b) :: l
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : BIDIAGONAL REDUCTION OF A m-by-n REAL MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), p(n,nm), d(nm), e(nm),     &
              tauq(nm), taup(nm), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-by-n REAL RANDOM DATA MATRIX.
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), bd(nm,nm), resid(nm,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   FIRST, CALL bd_cmp TO REDUCE THE MATRIX a TO BIDIAGONAL FORM
!
!                      a = Q*BD*P**(t)
!
!   WHERE Q AND P ARE ORTHOGONAL AND BD IS AN UPPER OR LOWER BIDIAGONAL MATRIX.
!
    call bd_cmp( a, d, e, tauq, taup )
!
!   ON OUTPUT OF bd_cmp:
!
!       a, tauq AND taup CONTAINS THE ELEMENTARY REFLECTORS
!       DEFINING Q AND P IN PACKED FORM.
!
!       d AND e CONTAINS RESPECTIVELY, THE DIAGONAL AND
!       SUBDIAGONAL OF THE BIDIAGONAL MATRIX BD.
!
!   SECOND, CALL ortho_gen_bd TO GENERATE Q AND P.
!
    call ortho_gen_bd( a, tauq, taup, p )
!
!   ON OUTPUT OF ortho_gen_bd, a CONTAINS THE FIRST min(n,m) COLUMNS OF Q
!   AND p CONTAINS THE ORTHOGONAL MATRIX P.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION Q**(t)*a - BD*P**(t),
!
        bd(:nm,:nm) = zero
!
        if ( m>=n ) then
!
!           BD IS UPPER BIDIAGONAL.
!
            do l = 1_i4b, nm-1_i4b
                bd(l,l)       = d(l)
                bd(l,l+1_i4b) = e(l+1_i4b)
            end do
!
            bd(nm,nm) = d(nm)
!
        else
!
!           BD IS LOWER BIDIAGONAL.
!
            bd(1_i4b,1_i4b) = d(1_i4b)
!
            do l = 2_i4b, nm
                bd(l,l-1_i4b) = e(l)
                bd(l,l)       = d(l)
            end do
!
        endif
!
        resid(:nm,:n) = matmul( transpose(a(:m,:nm)), a2(:m,:n) )           &
                        - matmul( bd(:nm,:nm), transpose(p(:n,:nm )) )
!
        bd(:nm,1_i4b) = norm( resid(:nm,:n), dim=1_i4b )
        err1 =  maxval( bd(:nm,1_i4b) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q**(t)*Q.
!
        call unit_matrix( a2(:nm,:nm) )
!
        resid(:nm,:nm) = abs( a2(:nm,:nm) - matmul( transpose(a(:m,:nm )), a(:m,:nm ) ) )
        err2 = maxval( resid(:nm,:nm) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - P**(t)*P.
!
        resid(:nm,:nm) = abs( a2(:nm,:nm) - matmul( transpose(p(:n,:nm )), p(:n,:nm ) ) )
        err3 = maxval( resid(:nm,:nm) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, bd, resid )
!
    endif
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, p, d, e, tauq, taup )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed bidiagonal decomposition a = Q*BD*P**(t) = ', err1
        write (prtunit,*) 'Orthogonality of the computed Q orthogonal matrix                 = ', err2
        write (prtunit,*) 'Orthogonality of the computed P orthogonal matrix                 = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the bidiagonal reduction of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_cmp
! =========================
!
end program ex1_bd_cmp

ex1_bd_cmp2.F90

program ex1_bd_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_CMP2
!   in module SVD_Procedures for reducing to bidiagonal form a real matrix
!   using the one-sided Rhala-Barlow algorithm.
!                                                                              
!                                                                              
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, bd_cmp2,     &
                         norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE m>=n).
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of bd_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e
    real(stnd), allocatable, dimension(:,:) :: a2, resid, bd, a, p
!
    integer(i4b) :: l
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, reortho, do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : BIDIAGONAL REDUCTION OF A REAL m-by-n MATRIX (WITH m>=n)
!   USING THE RALHA-BARLOW ONE_SIDED ALGORITHM.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW ALGORITHM.
!
    reortho = true
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), p(n,n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX .
!
    call random_number( a )
!
    if ( do_test ) then
!
        allocate( a2(m,n), bd(n,n), resid(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   CALL bd_cmp2 TO REDUCE THE MATRIX a TO BIDIAGONAL FORM
!
!               a = Q*BD*P**(t)
!
!   WHERE Q AND P ARE ORTHOGONAL AND BD IS AN UPPER BIDIAGONAL MATRIX.
!   bd_cmp2 USES THE ONE_SIDED RHALA-BARLOW ALGORITM AND IS FASTER THAN
!   THE STANDARD BIDIAGONALIZATION ALGORITHM USED IN bd_cmp. HOWEVER, A
!   A LOSS OF ORTHOGONALITY CAN BE EXPECTED FOR Q WHEN THE MATRIX a IS SINGULAR
!   OR HAS A LARGE CONDITION NUMBER SINCE Q IS COMPUTED FROM A RECURRENCE RELATIONSHIP.
!   THIS LOSS OF ORTHOGONALITY CAN BE PARTLY CORRECTED BY SPECIFYING THE OPTIONAL LOGICAL
!   ARGUMENT reortho WITH THE VALUE true IN THE CALL TO bd_cmp2 (THIS IS THE DEFAULT VALUE
!   OF THIS ARGUMENT).
!
    call bd_cmp2( a, d, e, p, failure=failure, reortho=reortho )
!
!   ON OUTPUT OF bd_cmp2:
!
!       a CONTAINS THE FIRST min(n,m) COLUMNS OF Q
!       AND p CONTAINS THE ORTHOGONAL MATRIX P.
!
!       d AND e CONTAINS RESPECTIVELY, THE DIAGONAL AND
!       SUBDIAGONAL OF THE BIDIAGONAL MATRIX BD.
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS
!                         OF ORTHOGONALITY FOR Q CAN BE EXPECTED.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION Q**(t)*a - BD*P**(t),
!
        bd(:n,:n) = zero
!
        do l = 1_i4b, n-1_i4b
            bd(l,l)       = d(l)
            bd(l,l+1_i4b) = e(l+1_i4b)
        end do
!
        bd(n,n) = d(n)
!
        resid(:n,:n) = matmul( transpose(a(:m,:n)), a2(:m,:n) )    &
                        - matmul( bd(:n,:n), transpose(p(:n,:n)) )
!
        bd(:n,1_i4b) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( bd(:n,1_i4b) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q**(t)*Q.
!
        call unit_matrix( a2(:n,:n) )
!
        resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose( a ), a ) )
        err2 = maxval( resid(:n,:n) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - P**(t)*P.
!
        resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose( p ), p ) )
        err3 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, bd, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, p, d, e )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
!    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed bidiagonal decomposition a = Q*BD*P**(t) = ', err1
        write (prtunit,*) 'Orthogonality of the computed Q orthogonal matrix                 = ', err2
        write (prtunit,*) 'Orthogonality of the computed P orthogonal matrix                 = ', err3
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_cmp2() ) = ', failure
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the bidiagonal reduction of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_cmp2
! ==========================
!
end program ex1_bd_cmp2

ex1_bd_cmp3.F90

program ex1_bd_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_CMP3
!   in module SVD_Procedures for computing an accurate eigenvalue decomposition of
!   a real symmetric matrix product, a**(t)*a, using the one-sided ralha-barlow
!   bidiagonalization algorithm applied directly to matrix a .
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, bd_cmp3, &
                         bd_svd, norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE MATRIX WHOSE
! SVD MUST BE COMPUTED, m MUST BE GREATER THAN n, OTHERWISE
! bd_cmp3 WILL STOP WITH AN ERROR MESSAGE.
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=2000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character, parameter :: sort='d'
!
    character(len=*), parameter :: name_proc='Example 1 of bd_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e
    real(stnd), allocatable, dimension(:,:) :: a, at, ata, resid
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, failure, failure_bd
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : EIGENDECOMPOSITION OF A REAL SYMMETRIC MATRIX PRODUCT,
!               a**(t)*a, USING THE ONE-SIDED RALHA BIDIAGONALIZATION
!               METHOD APPLIED TO MATRIX a .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( ata(n,n), at(n,m), resid(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        at(:n,:m) = transpose( a(:m,:n) )
!
!       COMPUTE THE SYMMETRIC MATRIX CROSS-PRODUCT a**(t)*a .
!
        ata(:n,:n) = matmul( at(:n,:m), a(:m,:n) )
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   CALL bd_cmp3 TO REDUCE THE MATRIX a TO BIDIAGONAL FORM
!
!                      a = Q*BD*P**(t)
!
!   WHERE P AND Q ARE ORTHOGONAL AND BD IS AN UPPER BIDIAGONAL MATRIX.
!
!   ON ENTRY OF bd_cmp3, a MUST CONTAINS THE INITIAL m-by-n MATRIX.
!   THE ORTHOGONAL MATRIX P IS COMPUTED IF THE LOGICAL ARGUMENT gen_p IS SET TO true.
!   THE ORTHOGONAL MATRIX P IS STORED IN FACTORED FORM IF THE LOGICAL ARGUMENT gen_p IS
!   SET TO false.
!
    call bd_cmp3( a(:m,:n), d(:n), e(:n), gen_p=true, failure=failure_bd )
!
!   ON EXIT OF bd_cmp3:
!
!         ARGUMENTS d AND e CONTAIN, RESPECTIVELY, THE DIAGONAL AND
!         OFF-DIAGONAL ELEMENTS OF THE BIDIAGONAL MATRIX BD.
!
!         IF THE LOGICAL ARGUMENT gen_p IS SET TO false ON ENTRY,
!         THE LEADING n-BY-n LOWER TRIANGULAR PART OF a IS OVERWRITTEN
!         BY THE MATRIX P AS A PRODUCT OF ELEMENTARY REFLECTORS.
!
!         IF THE LOGICAL ARGUMENT gen_p IS SET TO false ON ENTRY,
!         THE LEADING n-BY-n PART OF a IS OVERWRITTEN
!         BY THE MATRIX P.
!
!         Q IS NOT COMPUTED BY bd_cmp3.
!
!   COMPUTE ONLY SINGULAR VALUES AND RIGHT SINGULAR VECTORS OF MATRIX a
!   WITH SUBROUTINE bd_svd:
!
!                            a = V*D*U**(t)
!
!   WHERE V AND U ARE THE LEFT AND RIGHT SINGULAR VECTORS, RESPECTIVELY, AND
!   D IS A DIAGONAL MATRIX, WITH SINGULAR VALUES ON THE DIAGONAL.
!
    call bd_svd( false, d(:n), e(:n), failure, a(:n,:n), sort=sort )
!
!   ON EXIT OF bd_svd :
!
!         ARGUMENTS d AND a(:n,:n) CONTAIN, RESPECTIVELY, THE SINGULAR VALUES AND
!         RIGHT SINGULAR VECTORS OF MATRIX a. LEFT SINGULAR VECTORS ARE NOT COMPUTED
!         WITH THIS CALL OF bd_svd.
!
!   COMPUTE EIGENVALUES OF a**(t)*a FROM THE SINGULAR VALUES OF a .
!
    d(:n) = d(:n)*d(:n)
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION (a**(t)*a)*U - U*D
!
        resid(:n,:n) = matmul( ata(:n,:n), a(:n,:n)  )    &
                       - a(:n,:n)*spread( d(:n), 1, n )
!
        e(:n) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( e(:n) )/( norm( ata )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( ata(:n,:n) )
!
        at(:n,:n) = transpose( a(:n,:n) )
!
        resid(:n,:n) = abs( ata(:n,:n) - matmul( at(:n,:n), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( ata, resid, at )
!
    endif
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, d, e )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigendecomposition a**(t)*a = U*D*U**(t) = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors U**(t)*U - I           = ', err2
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from bd_cmp3() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from bd_svd()  ) = ', failure
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the eigendecomposition of a ',       &
       n, ' by ', n,' real matrix cross-product is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_cmp3
! ==========================
!
end program ex1_bd_cmp3

ex1_bd_coef.F90

program ex1_bd_coef
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines/functions BD_COEF,
!   FREQ_FUNC and SYMLIN_FILTER in module Time_Series_Procedures for filtering a time
!   series in a specific frequency band with a Lanczos filter.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, zero, half, one, two, pi, true, false, merror, allocate_error,   &
                         bd_coef, freq_func, symlin_filter, init_fft, fft, end_fft
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=2001
!
    character(len=*), parameter :: name_proc='Example 1 of bd_coef'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err, tmp, fch, fcl
    real(stnd), dimension(n)              :: y, y2, y3, freqr
    real(stnd), dimension(:), allocatable :: coef
!
    complex(stnd), dimension(n) :: yc
!
    integer(i4b) :: i, k, k1, k2, pl, ph, nfilt, khalf, kmid
!   
    integer :: iok
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
!
!   pl IS THE MINIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   ph IS THE MAXIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   pl AND ph ARE EXPRESSED IN NUMBER OF OBSERVATIONS, I.E. pl=6(18) and ph=32(96) SELECT
!   PERIODS BETWEEN 1.5 YRS AND 8 YRS FOR QUARTERLY (MONTHLY) DATA.
!
    pl  = 35
    ph  = 96
!
!   COMPUTE THE CORRESPONDING CUTOFF FREQUENCIES.
!
    fch = one/real( ph, stnd )
    fcl = one/real( pl, stnd )
!
!   NOW SELECT THE OPTIMAL NUMBER OF FILTER COEFFICIENTS k FOR THE LANCZOS FILTER.
!
    k1  = ceiling( one/(half-fcl) )
    k2  = ceiling(  2.6/(fcl-fch) )
    k   = max( k1, k2, ph+1 )
    if ( (k/2)*2==k  ) k = k + 1
!
!   ALLOCATE WORK ARRAY FOR THE FILTER COEFFICIENTS.
!
    allocate( coef(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   FUNCTION bd_coef COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- BAND PASS FILTER WITH CUTOFF PERIODS PL AND PH (E.G. CUTOFF FREQUENCIES 1/PL AND 1/PH).
!
    coef(:k) = bd_coef( pl=pl, ph=ph, k=k )
!
!   SUBROUTINE symlin_filter PERFORMS A SYMMETRIC FILTERING OPERATION ON AN IMPUT
!   TIME SERIES (THE ARGUMENT VEC).
!
!   HERE symlin_filter FILTERS THE TIME SERIES, KEEPING THE PERIODS BETWEEN pl AND ph .
!   NOTE THAT (size(COEF)-1)/2 DATA POINTS WILL BE LOST FROM EACH END OF THE SERIES SO THAT
!   NFILT (NFILT= size(VEC) - size(COEF) + 1) TIME OBSERVATIONS ARE RETURNED IN VEC(:NFILT)
!   AND THE REMAINING OBSERVATIONS ARE SET TO ZERO.
!
    call symlin_filter( vec=y2(:n), coef=coef(:k),  nfilt=nfilt )
!
!   NOW COMPUTE THE TRANSFERT FUNCTION freqr(:) OF THE LANCZOS FILTER AT THE FOURIER FREQUENCIES.
!
    call freq_func( nfreq=n, coef=coef(:k), freqr=freqr(:n), four_freq=true )
!
!   NOW, APPLY THE FILTER USING THE FFT AND THE CONVOLUTION THEOREM.
!
!   INITIALIZE THE FFT SUBROUTINE.
!
    call init_fft( n )
!
!   TRANSFORM THE TIME SERIES.
!
    yc(1:n) = cmplx( y(1:n), zero, kind=stnd )
!
    call fft( yc(:n), forward=true  )
!
!   MULTIPLY THE FOURIER TRANSFORM OF THE TIME SERIES
!   BY THE TRANSFERT FUNCTION OF THE FILTER.
!
    yc(:n) = yc(:n)*freqr(:n)
!
!   INVERT THE SEQUENCE BACK TO GET THE FILTERED TIME SERIES.
!
    call fft( yc(:n), forward=false )
!
    y3(:n) = real( yc(:n),  kind=stnd )
!
!   DEALLOCATE WORK ARRAYS.
!
    call end_fft()
!
    deallocate( coef )
!
!   TEST THE ACCURACY OF THE FILTERING OPERATION.
!
    kmid  = ( k + 1 )/2
    khalf = ( k - 1 )/2
    k1    = kmid
    k2    = n - khalf
!
    err = maxval(abs(y3(k1:k2)-y2(:nfilt)))/maxval(abs(y(k1:k2)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_bd_coef
! ==========================
!
end program ex1_bd_coef

ex1_bd_coef2.F90

program ex1_bd_coef2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines/functions BD_COEF2
!   and SYMLIN_FILTER2 in module Time_Series_Procedures for filtering a time
!   series in a specific frequency band with a Lanczos filter.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, true, merror, allocate_error,   &
                         bd_coef2, symlin_filter2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=2000
!
    character(len=*), parameter :: name_proc='Example 1 of bd_coef2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err
    real(stnd), dimension(n)              :: y, y2, y3
    real(stnd), dimension(:), allocatable :: coef
!
    integer(i4b) :: k, k1, k2, pl, ph, khalf, kmid
!   
    integer :: iok
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
    y3(:n) = y(:n)
!
!   pl IS THE MINIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   ph IS THE MAXIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   pl AND ph ARE EXPRESSED IN NUMBER OF OBSERVATIONS, I.E. pl=6(18) and ph=32(96) SELECT
!   PERIODS BETWEEN 1.5 YRS AND 8 YRS FOR QUARTERLY (MONTHLY) DATA.
!
    pl  = 35
    ph  = 96
!
!   NOW SELECT THE NUMBER OF FILTER COEFFICIENTS k FOR THE HAMMING FILTER.
!
    k = ph + 1
    if ( (k/2)*2==k )  k = k + 1
!
!   ALLOCATE WORK ARRAY FOR THE FILTER COEFFICIENTS.
!
    allocate( coef(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   FUNCTION bd_coef2 COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- BAND PASS FILTER WITH CUTOFF PERIODS PL AND PH (E.G. CUTOFF FREQUENCIES 1/PL AND 1/PH).
!
    coef(:k) = bd_coef2( pl=pl, ph=ph, k=k )
!
!   SUBROUTINE symlin_filter2 PERFORMS A SYMMETRIC FILTERING OPERATION ON AN IMPUT
!   TIME SERIES (THE ARGUMENT VEC).
!
!   HERE symlin_filter2 FILTERS THE TIME SERIES, KEEPING THE PERIODS BETWEEN pl AND ph .
!   NOTE THAT (size(COEF)-1)/2 DATA POINTS WILL BE AFFECTED BY END EFFECTS  FROM EACH END OF THE SERIES.
!
    call symlin_filter2( vec=y2(:n), coef=coef(:k) )
!
!   FILTER AGAIN THE TIME SERIES, BUT COMPUTE THE VALUES AT THE ENDS OF THE OUTPUT
!   BY ASSUMING THAT THE INPUT IS PART OF A PERIODIC SEQUENCE OF PERIOD n .
!
    call symlin_filter2( vec=y3(:n), coef=coef(:k), usefft=true )
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( coef )
!
!   TEST THE ACCURACY OF THE FILTERING OPERATION.
!
    kmid  = ( k + 1 )/2
    khalf = ( k - 1 )/2
!
    k1    = kmid
    k2    = n - khalf
!
    err = maxval(abs(y3(k1:k2)-y2(k1:k2)))/maxval(abs(y(k1:k2)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_bd_coef2
! ===========================
!
end program ex1_bd_coef2

ex1_bd_deflate.F90

program ex1_bd_deflate
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_DEFLATE
!   in module SVD_Procedures for computing all or selected singular vectors
!   of a bidiagonal matrix by a (Godunov) deflation method.                                                                        
!
!   The computations are parallelized if OpenMP is used and an highly efficient variant
!   of the deflation algorithm is used.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_SINGVAL in module SVD_Procedures
!    for computing singular values of a real bidiagonal matrix.
!
!
! LATEST REVISION : 09/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, c50, safmin, bd_deflate,    &
                         bd_singval, unit_matrix, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE BIDIAGONAL MATRIX,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! max_qr_steps IS THE MAXIMUM NUMBER OF QR STEPS PERFORMED TO COMPUTE THE TOP nsing SINGULAR VECTORS
! IN THE DEFLATION ALGORITHM.
!
    integer(i4b), parameter :: prtunit=6, n=3000, nsing=100, max_qr_steps=3_i4b
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of bd_deflate'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, abstol, normbd, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diag, sup, singval, resid2
    real(stnd), allocatable, dimension(:,:) :: leftvec, rightvec, resid
!
    integer(i4b) :: nsing2
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure1, failure2, bd_is_upper, ortho, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A REAL BIDIAGONAL MATRIX BD USING A BISECTION ALGORITHM
!               FOR SINGULAR VALUES AND THE GODUNOV DEFLATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    abstol = sqrt(safmin)
    eps    = fudge*epsilon( err )
    err    = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE DEFLATION ALGORITHM.
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( diag(n), sup(n), singval(n),                    &
              leftvec(n,nsing), rightvec(n,nsing), stat=iok   )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE AN UPPER BIDIAGONAL MATRIX BD.
!   THE DIAGONAL ELEMENTS ARE STORED IN diag.
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN sup.
!
    bd_is_upper  = true
!
    sup(1_i4b)   = zero
!
    diag(:n)     = half
    sup(2_i4b:n) = half
!
!    call random_number( diag(:n) )
!    call random_number( sup(2_i4b:n) )
!
!    diag(1_i4b)   = 1._stnd
!    diag(2_i4b:n) = 200._stnd*epsilon( err )
!    sup(2_i4b:n)  = 200._stnd*epsilon( err )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   FIRST COMPUTE THE LARGEST nsing SINGULAR VALUES OF THE BIDIAGONAL MATRIX BD BY A BISECTION METHOD.
!
!   ON ENTRY OF bd_singval :
!
!         diag(:n) MUST CONTAIN THE DIAGONAL ELEMENTS OF THE BIDIAGONAL MATRIX.
!         sup(:n)  MUST CONTAIN THE OFF-DIAGONAL ELEMENTS OF THE BIDIAGONAL MATRIX (sup(1) IS ARBITRARY).
!
!         THE OPTIONAL ARGUMENT abstol OF bd_singval MAY BE USED TO INDICATE THE REQUIRED PRECISION FOR THE
!         SINGULAR VALUES. A SINGULAR VALUE IS CONSIDERED TO BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN
!         AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY
!         WHEN abstol IS SET TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ).
!
!         THE OPTIONAL ARGUMENT ls MAY BE USED TO INDICATE THE NUMBER OF SINGULAR VALUES TO BE COMPUTED.
!
    call bd_singval( diag(:n), sup(:n), nsing2, singval(:n), failure=failure1, sort=sort, abstol=abstol, ls=nsing  )
!
!   ON EXIT OF bd_singval :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT.
!         failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE BISECTION ALGORITHM.
!
!         THE COMPUTED SINGULAR VALUES ARE STORED IN THE FIRST nsing2 ELEMENTS OF THE ARRAY singval IN
!         DECREASING (sort='d') OR ASCENDING (sort='a') ORDER. NOTE THAT nsing2 MAY BE GREATER THAN
!         ARGUMENT ls IN CASE OF MULTIPLE SINGUAR VALUES.
!
!   NEXT COMPUTE THE FIRST nsing SINGULAR VECTORS OF THE BIDIAGONAL MATRIX BD
!   BY A DEFLATION TECHNIQUE WITH SUBROUTINE bd_deflate.
!
!   ON ENTRY OF bd_deflate:
!
!         bd_is_upper INDICATES IF THE BIDIAGONAL MATRIX IS UPPER OR LOWER BIDIAGONAL.
!
!         diag(:n) MUST CONTAIN THE DIAGONAL ELEMENTS OF THE BIDIAGONAL MATRIX.
!         sup(:n)  MUST CONTAIN THE OFF-DIAGONAL ELEMENTS OF THE BIDIAGONAL MATRIX (sup(1) IS ARBITRARY).
!
!         PARAMETER singval CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX bd.
!         THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!         OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!         COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!         (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!         SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    call bd_deflate( bd_is_upper, diag(:n), sup(:n), singval(:nsing), leftvec(:n,:nsing), rightvec(:n,:nsing),  &
                     failure=failure2, ortho=ortho, max_qr_steps=max_qr_steps )
!
!   ON EXIT OF bd_deflate :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT IN THE DEFLATION ALGORITHM
!         failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE DEFLATION ALGORITHM.
!
!         leftvec AND rightvec CONTAIN, RESPECTIVELY, THE nsing LEFT AND RIGHT
!         SINGULAR VECTORS OF THE BIDIAGONAL MATRIX BD ASSOCIATED WITH THE SINGULAR VALUES singval(:nsing).
!
!         bd_deflate MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER singval ARE NEARLY
!         IDENTICAL FOR SOME PATHOLOGICAL BIDIAGONAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( resid2(nsing), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION BD*rightvec - leftvec*diag(singval(:nsing)),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX BD.
!
        if ( bd_is_upper ) then
!
            resid(:n,:nsing) = spread( diag(:n),        dim=2, ncopies=nsing )*rightvec                 + &
                               eoshift( spread(sup(:n), dim=2,ncopies=nsing)*rightvec, shift=1 )        - &
                               spread( singval(:nsing), dim=1, ncopies=n )*leftvec

        else
!
            resid(:n,:nsing) = spread( diag(:n),        dim=2, ncopies=nsing )*leftvec                   + &
                               eoshift( spread(sup(:n), dim=2,ncopies=nsing)*leftvec, shift=1 )          - &
                               spread( singval(:nsing), dim=1, ncopies=n )*rightvec
!
        end if
!
        resid2(:nsing) = norm( resid(:n,:nsing), dim=2_i4b )
!
        normbd = sum( diag(:n)*diag(:n) + sup(2_i4b:n)*sup(2_i4b:n) )
        err1 = maxval( resid2(:nsing) )/( normbd*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - leftvec**(t)*leftvec
!       WHERE leftvec ARE THE LEFT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        call unit_matrix( resid(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( resid(:nsing,:nsing) - matmul( transpose( leftvec ), leftvec ) )
        err2 = maxval(resid(:nsing,:nsing))/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - rightvec(t)*rightvec
!       WHERE rightvec ARE THE RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        call unit_matrix( resid(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( resid(:nsing,:nsing) - matmul( transpose( rightvec ), rightvec ) )
        err3 = maxval(resid(:nsing,:nsing))/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resid, resid2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( diag, sup, singval, leftvec, rightvec )
!
!   PRINT RESULTS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_singval() ) = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_deflate() ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', n,' real bidiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_deflate
! =============================
!
end program ex1_bd_deflate

ex1_bd_deflate2.F90

program ex1_bd_deflate2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_DEFLATE2
!   in module SVD_Procedures for computing all or selected singular vectors
!   of a real matrix by a (Godunov) deflation technique.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP and BD_SINGVAL2 in module SVD_Procedures
!    for computing a bidiagonal reduction of a real matrix and all or selected singular values of
!    a real bidiagonal matrix.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_5, c1_e6, &
                         safmin, norm, unit_matrix, merror, bd_cmp, bd_deflate2, bd_singval2,  &
                         allocate_error, random_seed_, random_number_, gen_random_mat
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn) FOR CASES GREATER THAN 0,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, mn=min(m,n), nsvd0=3000, nsing=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of bd_deflate2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, d, e, tauq, taup, tauo
    real(stnd), allocatable, dimension(:,:) :: a, a2, leftvec, rightvec, resid, rla
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: ns, max_qr_steps, mnthr, i, mat_type
!
    logical(lgl) :: failure1, failure2, ortho, do_test, two_stage
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A REAL MATRIX USING A BISECTION ALGORITHM FOR SINGULAR VALUES
!               AND THE GODUNOV DEFLATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   CHOOSE TUNING PARAMETERS FOR THE DEFLATION ALGORITHM.
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!   EXCEEDS max_qr_steps * nsing.
!
    max_qr_steps = 4_i4b
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF max(n,m) EXCEEDS mnthr ,
!   A QR OR LQ FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( mn, stnd )*c1_5, i4b )
!
    two_stage = max( n, m )>=mnthr
!    two_stage = true
!    two_stage = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing), rla(mn,mn),    &
                  s(mn), d(mn), e(mn), tauq(mn), taup(mn), tauo(mn), stat=iok )
    else
        allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing),      &
                  s(mn), d(mn), e(mn), tauq(mn), taup(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
        allocate( a2(n,m), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE nsing SINGULAR VALUES OF a AND
!   THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD
!   DECOMPOSITION OF a) IN THREE STEPS:
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM WITH SUBROUTINE bd_cmp.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN mat, tauq, taup, rla AND tauo.
!
        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn), rla(:mn,:mn), tauo=tauo(:mn) )
!        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn), rla(:mn,:mn) )
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION WITH SUBROUTINE bd_cmp.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN mat, tauq, taup.
!
        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn) )
!
    end if
!
!   STEP2 : COMPUTE nsing SINGULAR VALUES OF THE BIDIAGONAL MATRIX (WHICH ARE ALSO THE
!   SINGULAR VALUES OF a) WITH SUBROUTINE bd_singval2 TO HIGH RELATIVE PRECISION.
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d').
!
!   THE OPTIONAL ARGUMENT abstol OF bd_singval2 MAY BE USED TO INDICATE THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES. A SINGULAR VALUE IS CONSIDERED TO BE LOCATED
!   IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ).
!
    call bd_singval2( d(:mn), e(:mn), ns, s(:mn), failure=failure1, sort=sort, &
                      abstol=abstol, ls=nsing )
!
!   STEP3 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
!   ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!   MATRIX STORED IN VECTORS d AND e. THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!   OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!   EXCEEDS max_qr_steps * nsing.
!
    if ( two_stage ) then
!
        call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), rla(:mn,:mn), d(:mn), e(:mn), s(:nsing),    &
                          leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2, tauo=tauo(:mn),  &
                          ortho=ortho, max_qr_steps=max_qr_steps                       )
!
!        call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), rla(:mn,:mn), d(:mn), e(:mn), s(:nsing),   &
!                          leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2,                 &
!                          ortho=ortho, max_qr_steps=max_qr_steps                       )
!
    else
!
        call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), d(:mn), e(:mn), s(:nsing),   &
                          leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2,   &
                          ortho=ortho, max_qr_steps=max_qr_steps                       )
!
    end if
!
!   ON EXIT OF bd_deflate2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                         DEFLATION ALGORITHM.

!       leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!       SINGULAR VECTORS OF a .
!
!   bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
!
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        a2(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
!
        err1 = maxval( a2(:nsing,1_i4b) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
!
        deallocate( a, leftvec, rightvec, rla, s, d, e, tauq, taup, tauo )
!
    else
!
        deallocate( a, leftvec, rightvec, s, d, e, tauq, taup )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_singval2() ) = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_deflate2() ) = ', failure2
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_deflate2
! ==============================
!
end program ex1_bd_deflate2

ex1_bd_deflate2_bis.F90

program ex1_bd_deflate2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_DEFLATE2
!   in module SVD_Procedures for computing all or selected singular vectors
!   of a real matrix by a (Godunov) deflation technique.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP and BD_SINGVAL in module SVD_Procedures
!    for computing a bidiagonal reduction of a real matrix and all or selected singular values of
!    a real bidiagonal matrix.
!
!
! LATEST REVISION : 14/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_5, c1_e6, &
                         safmin, norm, unit_matrix, merror, bd_cmp, bd_deflate2, bd_singval,   &
                         allocate_error, random_seed_, random_number_, gen_random_mat
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn) FOR CASES GREATER THAN 0,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, mn=min(m,n), nsvd0=3000, nsing=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of bd_deflate2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, d, e, tauq, taup, tauo
    real(stnd), allocatable, dimension(:,:) :: a, a2, leftvec, rightvec, resid, rla
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: ns, max_qr_steps, mnthr, i, mat_type
!
    logical(lgl) :: failure1, failure2, ortho, do_test, two_stage
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A REAL MATRIX USING A BISECTION ALGORITHM FOR SINGULAR VALUES
!               AND THE GODUNOV DEFLATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps    = fudge*epsilon( err )
    abstol = sqrt( safmin )
    err    = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!   EXCEEDS max_qr_steps * nsing.
!
    max_qr_steps = 4_i4b
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF max(n,m) EXCEEDS mnthr ,
!   A QR OR LQ FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( mn, stnd )*c1_5, i4b )
!
    two_stage = max( n, m )>=mnthr
!    two_stage = true
!    two_stage = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing), rla(mn,mn),    &
                  s(mn), d(mn), e(mn), tauq(mn), taup(mn), tauo(mn), stat=iok )
    else
        allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing),      &
                  s(mn), d(mn), e(mn), tauq(mn), taup(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
        allocate( a2(n,m), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE nsing SINGULAR VALUES OF a AND
!   THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD
!   DECOMPOSITION OF a) IN THREE STEPS:
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM WITH SUBROUTINE bd_cmp.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN mat, tauq, taup, rla AND tauo.
!
        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn), rla(:mn,:mn), tauo=tauo(:mn) )
!        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn), rla(:mn,:mn) )
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION WITH SUBROUTINE bd_cmp.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN mat, tauq, taup.
!
        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn) )
!
    end if
!
!   STEP2 : COMPUTE nsing SINGULAR VALUES OF THE BIDIAGONAL MATRIX (WHICH ARE ALSO THE
!   SINGULAR VALUES OF a) WITH SUBROUTINE bd_singval TO HIGH RELATIVE PRECISION.
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d').
!
!   THE OPTIONAL ARGUMENT abstol OF bd_singval MAY BE USED TO INDICATE THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES. A SINGULAR VALUE IS CONSIDERED TO BE LOCATED
!   IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ).
!
    call bd_singval( d(:mn), e(:mn), ns, s(:mn), failure=failure1, sort=sort, &
                     abstol=abstol, ls=nsing )
!
!   STEP3 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
!   ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!   MATRIX STORED IN VECTORS d AND e. THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!   OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!   EXCEEDS max_qr_steps * nsing.
!
    if ( two_stage ) then
!
        call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), rla(:mn,:mn), d(:mn), e(:mn), s(:nsing),    &
                          leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2, tauo=tauo(:mn),  &
                          ortho=ortho, max_qr_steps=max_qr_steps                       )
!
!        call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), rla(:mn,:mn), d(:mn), e(:mn), s(:nsing),   &
!                          leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2,                 &
!                          ortho=ortho, max_qr_steps=max_qr_steps                       )
!
    else
!
        call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), d(:mn), e(:mn), s(:nsing),   &
                          leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2,   &
                          ortho=ortho, max_qr_steps=max_qr_steps                       )
!
    end if
!
!   ON EXIT OF bd_deflate2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                         DEFLATION ALGORITHM.

!       leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!       SINGULAR VECTORS OF a .
!
!   bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
!
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        a2(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
!
        err1 = maxval( a2(:nsing,1_i4b) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
!
        deallocate( a, leftvec, rightvec, rla, s, d, e, tauq, taup, tauo )
!
    else
!
        deallocate( a, leftvec, rightvec, s, d, e, tauq, taup )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_singval() )  = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_deflate2() ) = ', failure2
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_deflate2
! ==============================
!
end program ex1_bd_deflate2

ex1_bd_deflate2_ter.F90

program ex1_bd_deflate2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_DEFLATE2
!   in module SVD_Procedures for computing all or selected singular vectors
!   of a real matrix by a (Godunov) deflation technique.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine SVD_CMP in module SVD_Procedures
!    for computing a bidiagonal reduction of a real matrix and all singular values of
!    a real bidiagonal matrix.
!
!
! LATEST REVISION : 14/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6,  &
                         svd_cmp, bd_deflate2, norm, unit_matrix, merror, allocate_error, &
                         random_seed_, random_number_, gen_random_mat

#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn) FOR CASES GREATER THAN 0,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, mn=min(m,n), nsvd0=3000, nsing=10
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of bd_deflate2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, &
                                               anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, d, e, tauq, taup
    real(stnd), allocatable, dimension(:,:) :: a, a2, leftvec, rightvec, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: max_qr_steps, i, mat_type
!
    logical(lgl) :: failure1, failure2, ortho, do_test
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A REAL MATRIX USING THE BIDIAGONAL QR ALGORITHM FOR SINGULAR VALUES
!               AND THE GODUNOV DEFLATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 3_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE DEFLATION ALGORITHM.
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!   EXCEEDS max_qr_steps * nsing.
!
    max_qr_steps = 4_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing),      &
              s(mn), d(mn), e(mn), tauq(mn), taup(mn), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
        allocate( a2(n,m), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE ALL THE SINGULAR VALUES OF a AND
!   ONLY nsing LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD DECOMPOSITION
!   OF a) IN TWO STEPS:
!
!   STEP1 : COMPUTE ALL SINGULAR VALUES OF a AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE svd_cmp.
!
    call svd_cmp( a(:n,:m), s(:mn), failure=failure1, sort=sort, d=d(:mn),   &
                  e=e(:mn), tauq=tauq(:mn), taup=taup(:mn)  )
!
!   ON EXIT OF svd_cmp :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT
!         failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                           THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                           SVD OF AN INTERMEDIATE BIDIAGONAL FORM BD OF a.
!
!         s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!         IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!         IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!
!   HERE, SORT = 'd' IS USED. THIS IS REQUIRED FOR THE USE OF bd_inviter2.
!                
!   IF THE PARAMETER v IS ABSENT IN THE CALL OF svd_cmp, svd_cmp COMPUTES ONLY THE            
!   SINGULAR VALUES OF a AND, OPTIONALLY, STORES THE INTERMEDIATE BIDIAGONAL FORM
!   OF a AND THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN THE OPTIONAL ARGUMENTS d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM d AND e ARE STORED 
!   IN mat, tauq, taup, WHEN THE OPTIONAL ARGUMENTS tauq AND taup ARE PRESENT.
!
!   STEP2 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
!   ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!   MATRIX (OR OF a). THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!   OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!   EXCEEDS max_qr_steps * nsing.
!
    call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), d(:mn), e(:mn), s(:nsing),   &
                      leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2,   &
                      ortho=ortho, max_qr_steps=max_qr_steps                       )
!
!   ON EXIT OF bd_deflate2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                         DEFLATION ALGORITHM.

!       leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!       SINGULAR VECTORS OF a .
!
!   bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
!
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        a2(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
!
        err1 = maxval( a2(:nsing,1_i4b) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, leftvec, rightvec, s, d, e, tauq, taup )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from svd_cmp() )     = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_deflate2() ) = ', failure2
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_deflate2
! ==============================
!
end program ex1_bd_deflate2

ex1_bd_dqds.F90

program ex1_bd_dqds
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_DQDS
!   in module SVD_Procedures for computing all singular values of a n-by-n bidiagonal matrix.
!   The singular values are computed by the the differential quotient difference with
!   shifts (dqds) algorithm to high relative accuracy, in the absence of denormalization,
!   underflow and overflow.
!
!   Note that BD_DQDS is slower than BD_LASQ1 subroutine, which is also based on dqds, but
!   may be more accurate for some real bidiagonal matrices.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER in module SVD_Procedures
!    fo computing singular vectors of a real bidiagonal matrix.
!
!
! LATEST REVISION : 08/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, quarter, one, two, five, c50, c200,  &
                         bd_inviter, bd_dqds, unit_matrix, norm, geop, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE BIDIAGONAL MATRIX,
! ls IS THE NUMBER OF THE LARGEST SINGULAR VECTOR COUPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=1000, ls=15, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of bd_dqds'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, s, e2
    real(stnd), allocatable, dimension(:,:) :: id, resid, leftvec, rightvec
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: mat_type, j, i
!
    logical(lgl) :: failure1=false, failure2=false, bd_is_upper, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : ALL SINGULAR VALUES AND ls SINGULAR VECTORS OF A n-BY-n REAL BIDIAGONAL MATRIX
!               bd BY THE DQDS-INVERSE ITERATION METHOD (E.G. PARTIAL SVD DECOMPOSITION).
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( d(n), e(n), s(n), e2(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE AN UPPER BIDIAGONAL MATRIX bd .
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    bd_is_upper = true
    e(1_i4b)    = zero
!
!   SPECIFY THE TYPE OF BIDIAGONAL MATRIX:
!
    mat_type = 9_i4b
!
!   GENERATE THE BIDIAGONAL MATRIX.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           bd IS A GRADED MATRIX.
!
            d(:n)      = geop( one, quarter, n )
            e(2_i4b:n) = d(:n-1_i4b)
!
        case( 2_i4b )
!
!           bd IS A TOEPLITZ MATRIX.
!
            d(:n)      = one
            e(2_i4b:n) = two
!
        case( 3_i4b )
!
            e(2_i4b:n) = one
            do j=1_i4b, n
	        d(j) = real( n + 1_i4b - j, stnd )
            end do
!
        case( 4_i4b )
!
            do j=1_i4b, n-1_i4b
	        d(j)       = real( n + 1_i4b - j, stnd )
                e(j+1_i4b) = d(j)/five
            end do
	    d(n) = one
!
        case( 5_i4b )
!
            do j=1_i4b, (n+1_i4b)/2_i4b
                d(j) = real( j, stnd )
            end do
!
            do j=(n+1_i4b)/2_i4b+1_i4b, n
                d(j) = real( n - j, stnd )
            end do
!
	    do j=1, n-1_i4b
                e(j+1_i4b) = d(j)/five
            end do
!
        case( 6_i4b )
!
            do j=1_i4b, n/2_i4b
                d(2_i4b*j-1_i4b) = real( n + 1_i4b - j, stnd )
                d(2_i4b*j)       = real( j, stnd )
            end do
!
	    do j=1_i4b, n-1_i4b
                e(j+1_i4b) = real( (n-j)/5_i4b, stnd )
	    end do
!
        case( 7_i4b )
!
            d(1_i4b)   = one
            d(2_i4b:n) = c200*epsilon( err )
            e(2_i4b:n) = c200*epsilon( err )
!
        case( 8_i4b )
!
            d(n) = 1e-20
            do j=n-1_i4b, 1_i4b, -1_i4b
	        d(j)       = d(j+1_i4b)*1.01_stnd
                e(j+1_i4b) = d(j)
            end do
!
        case( 9_i4b )
!
            do j=0_i4b, n/11_i4b-1_i4b
!
                do i=1_i4b, 6_i4b
                    d(11_i4b*j+i) = real( 1_i4b+10_i4b*(i-1_i4b), stnd )
                    e(11_i4b*j+i+1_i4b) = one
                end do
!
                do i=7_i4b,10_i4b
                    d(11_i4b*j+i) = real( 1_i4b+10_i4b*(11_i4b-i), stnd )
                    e(11_i4b*j+i+1_i4b) = one
                end do
!
                d(11_i4b*j+11_i4b) = one
!
                if ( 11_i4b*j+12_i4b<=n ) then
                    d(11_i4b*j+12_i4b) = one
                    e(11_i4b*j+12_i4b) = 1e-4
                end if
!
            end do
!
        case default
!
!           bd IS A UNIFORM RANDOM MATRIX.
!
            call random_number( d(:n) )
            call random_number( e(2_i4b:n) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( d ) ) then
!
        if ( .not.all( ieee_is_normal( d ) ) .and. .not.all( ieee_is_normal( e ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input bidiagonal matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   MAKE A COPY OF THE BIDIAGONAL MATRIX.
!
    s(:n)  = d(:n)
    e2(:n) = e(:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-n BIDIAGONAL MATRIX bd
!   IS WRITTEN
!
!                       bd = u * s * v**(t)
!
!   WHERE s IS AN n-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   n DIAGONAL ELEMENTS, u AND v ARE n-BY-n ORTHOGONAL MATRICES.
!   THE DIAGONAL ELEMENTS OF s ARE THE SINGULAR VALUES OF bd; THEY ARE
!   REAL AND NON-NEGATIVE.
!   THE COLUMNS OF u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF bd.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE ALL THE SINGULAR VALUES OF bd AND
!   ITS LEADING LEFT AND RIGHT SINGULAR VECTORS (E.G. A PARTIAL SVD DECOMPOSITION OF bd)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE ALL THE SINGULAR VALUES OF bd BY THE DQDS ALGORITHM
!   WITH SUBROUTINE bd_dqds.
!
!   ON ENTRY OF bd_dqds, s(:n) AND e2(:n) CONTAIN, RESPECTIVELY, THE DIAGONAL AND SUBDIAGONAL
!   ELEMENTS OF THE BIDIAGONAL MATRIX.
!
    call bd_dqds( s(:n), e2(:n), failure=failure1, sort=sort )
!
!   ON EXIT OF bd_dqds :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE DQDS ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF bd.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_inviter IN THE SECOND STEP.
!
!   STEP2 : COMPUTE THE ls LEADING LEFT AND RIGHT SINGULAR VECTORS OF bd BY INVERSE
!   ITERATION WITH SUBROUTINE bd_inviter :
!
    if ( ls>0 .and. .not.failure1 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,ls),  &
                  rightvec(n,ls), &
                  stat = iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST ls SINGULAR VECTORS OF bd BY maxiter INVERSE ITERATIONS WITH
!       SUBROUTINE bd_inviter .
!
        call bd_inviter( bd_is_upper, d(:n), e(:n), s(:ls), leftvec(:n,:ls), rightvec(:n,:ls),  &
                         failure=failure2, maxiter=maxiter )
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX bd .
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       ON EXIT OF bd_inviter :
!
!          failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM.
!          failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE FOR SOME SINGULAR VECTORS.
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF bd ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM OR A BLOCKED QR ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST ls LEFT AND RIGHT SINGULAR VECTORS OF bd, RESPECTIVELY.
!
!       bd_inviter MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY IDENTICAL.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. ls>0  .and. .not.failure1 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( id(ls,ls), resid(n,ls), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION bd*v - u*singval(:ls),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF bd .
!
        if ( bd_is_upper ) then
!
            resid(:n,:ls) = spread( d(:n),         dim=2, ncopies=ls )*rightvec                 + &
                            eoshift( spread(e(:n), dim=2,ncopies=ls)*rightvec, shift=1 )        - &
                            spread( s(:ls),     dim=1, ncopies=n )*leftvec 

        else
!
            resid(:n,:ls) = spread( d(:n),         dim=2, ncopies=ls )*leftvec                   + &
                            eoshift( spread(e(:n), dim=2,ncopies=ls)*leftvec, shift=1 )          - &
                            spread( s(:ls),     dim=1, ncopies=n )*rightvec
!
        end if
!
        id(:ls,1_i4b) = norm( resid(:n,:ls), dim=2_i4b )
        err1 = maxval( id(:ls,1_i4b) )/( sum( s(:ls) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are LEFT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd .
!
        call unit_matrix( id )
!
        resid(:ls,:ls) = abs( id - matmul( transpose( leftvec ), leftvec ) )
        err2 = maxval( resid(:ls,:ls) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - rightvec**(t)*rightvec
!       WHERE rightvec ARE THE RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd .
!
        resid(:ls,:ls) = abs( id - matmul( transpose( rightvec ), rightvec ) )
        err3 = maxval( resid(:ls,:ls) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( id, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( ls>0 .and. .not.failure1 ) then
        deallocate( d, e, s, e2, leftvec, rightvec )
    else
        deallocate( d, e, s, e2 )
    end if
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_dqds() )   = ', failure1
    if ( ls>0 .and. .not.failure1 ) then
        write (prtunit,*) ' FAILURE ( from bd_inviter() ) = ', failure2
    end if
!
    if ( do_test .and. ls>0 .and. .not.failure1 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', ls
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and', ls, ' singular vectors of a ', &
       n, ' by ', n,' real bidiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_dqds
! ==========================
!
end program ex1_bd_dqds

ex1_bd_inviter.F90

program ex1_bd_inviter
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_INVITER
!   in module SVD_Procedures for computing all or selected singular vectors
!   of a bidiagonal matrix by an inverse iteration method.                                                                        
!
!   The computations are parallelized if OpenMP is used and an highly efficient variant
!   of the inverse iteration algorithm is used.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_SVD in module SVD_Procedures
!    for computing singular values of a real bidiagonal matrix.
!
!
! LATEST REVISION : 14/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, c100, bd_inviter, bd_svd, &
                         unit_matrix, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE BIDIAGONAL MATRIX,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP nsing SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, nsing=3000, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c100
!
    character(len=*), parameter :: name_proc='Example 1 of bd_inviter'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diag, sup, sup2, singval
    real(stnd), allocatable, dimension(:,:) :: id, resid, leftvec, rightvec
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure1, failure2, bd_is_upper, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL OR FULL SVD OF A REAL n-BY-n BIDIAGONAL MATRIX BD USING THE GOLUB-REINSCH
!               QR IMPLICIT SHIFT ALGORITHM FOR ALL SINGULAR VALUES AND THE INVERSE ITERATION
!               METHOD FOR SELECTED SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( diag(n), sup(n), sup2(n), singval(n),           &
              leftvec(n,nsing), rightvec(n,nsing), stat=iok   )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE AN UPPER BIDIAGONAL MATRIX BD.
!   THE DIAGONAL ELEMENTS ARE STORED IN diag .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN sup .
!
    bd_is_upper   = true
!
    sup(1_i4b)   = zero
!
!    diag(:n)     = half
!    sup(2_i4b:n) = half
!
    call random_number( diag(:n) )
    call random_number( sup(2_i4b:n) )
!
!    diag(1_i4b)   = 1._stnd
!    diag(2_i4b:n) = 200._stnd*epsilon( err )
!    sup(2_i4b:n)  = 200._stnd*epsilon( err )
!
!   MAKE A COPY OF THE BIDIAGONAL MATRIX.
!
    singval(:n) = diag(:n)
    sup2(:n)    = sup(:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE ALL SINGULAR VALUES OF THE BIDIAGONAL MATRIX BD .
!   THE SINGULAR VALUES ARE STORED IN singval IN DECREASING ORDER (sort='d').
!
    call bd_svd( bd_is_upper, singval(:n), sup2(:n), failure=failure1, sort=sort  )
!
!   ON EXIT OF bd_svd :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT.
!         failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE BIDIAGONAL QR ALGORITHM.
!
!   COMPUTE THE FIRST nsing SINGULAR VECTORS OF BD BY maxiter INVERSE ITERATIONS WITH
!   SUBROUTINE bd_inviter. THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call bd_inviter( bd_is_upper, diag(:n), sup(:n), singval(:nsing), leftvec(:n,:nsing), rightvec(:n,:nsing),  &
                     failure=failure2, maxiter=maxiter )
!
!   ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX bd .
!   THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!   ON EXIT OF bd_inviter :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!         failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM
!
!         leftvec AND rightvec CONTAIN, RESPECTIVELY, THE nsing LEFT AND RIGHT SINGULAR
!         VECTORS OF THE BIDIAGONAL MATRIX BD ASSOCIATED WITH THE SINGULAR VALUES singval(:nsing).
!
!   THE SINGULAR VECTORS OF THE BIDIAGONAL MATRIX BD ARE COMPUTED USING maxiter INVERSE ITERATIONS
!   FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF BD ARE THEN ORTHOGONALIZED BY 
!   THE MODIFIED GRAM-SCHMIDT ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!   ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF BD, RESPECTIVELY.
!
!   bd_inviter MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER singval ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL BIDIAGONAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( id(nsing,nsing), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION bd*rightvec - leftvec*diag(singval(:nsing)),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        if ( bd_is_upper ) then
!
            resid(:n,:nsing) = spread( diag(:n),        dim=2, ncopies=nsing )*rightvec                 + &
                               eoshift( spread(sup(:n), dim=2,ncopies=nsing)*rightvec, shift=1 )        - &
                               spread( singval(:nsing), dim=1, ncopies=n )*leftvec

        else
!
            resid(:n,:nsing) = spread( diag(:n),        dim=2, ncopies=nsing )*leftvec                   + &
                               eoshift( spread(sup(:n), dim=2,ncopies=nsing)*leftvec, shift=1 )          - &
                               spread( singval(:nsing), dim=1, ncopies=n )*rightvec
!
        end if
!
        id(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
        err1 = maxval( id(:nsing,1_i4b) )/( sum( singval(:nsing) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - leftvec**(t)*leftvec
!       WHERE leftvec ARE THE LEFT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        call unit_matrix( id )
!
        resid(:nsing,:nsing) = abs( id - matmul( transpose( leftvec ), leftvec ) )
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - rightvec**(t)*rightvec
!       WHERE rightvec ARE THE RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        resid(:nsing,:nsing) = abs( id - matmul( transpose( rightvec ), rightvec ) )
        err3 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( id, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( diag, sup, sup2, singval, leftvec, rightvec )
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_svd() )     = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_inviter() ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', n,' real bidiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_inviter
! =============================
!
end program ex1_bd_inviter

ex1_bd_inviter2.F90

program ex1_bd_inviter2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_INVITER2
!   in module SVD_Procedures for computing a full or partial SVD of a real matrix using
!   the Chan-Golub-Reinsch bidiagonal reduction algorithm, a bisection algorithm for
!   singular values and the inverse iteration method for singular vectors.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Chan-Golub-Reinsch bidiagonal reduction, bisection and inverse iteration
!   algorithms are used.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP and BD_SINGVAL2 in module SVD_Procedures
!    for computing a bidiagonal reduction of a real matrix and all or selected singular values of
!    a real bidiagonal matrix.
!                                                                              
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, safmin, bd_cmp, bd_inviter2, bd_singval2, &
                         norm, unit_matrix, zero, one, seven, c30, c50, c1_5, c1_e6, merror,   &
                         allocate_error, random_seed_, random_number_, gen_random_mat
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn) FOR CASES GREATER THAN 0,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP nsing SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, mn=min(m,n), nsvd0=3000, &
                               nsing=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of bd_inviter2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, s0, d, e, tauq, taup, tauo
    real(stnd), allocatable, dimension(:,:) :: a, a2, leftvec, rightvec, resid, rla
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: ns, mnthr, i, mat_type
!
    logical(lgl) :: failure1, failure2, do_test, two_stage
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : nsing SINGULAR VALUES AND VECTORS OF A n-BY-m REAL MATRIX
!               USING A BISECTION METHOD FOR THE SINGULAR VALUES AND THE INVERSE
!               ITERATION METHOD FOR THE SINGULAR VECTORS (e.g., A PARTIAL SVD DECOMPOSITION).
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 3_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF max(n,m) EXCEEDS mnthr ,
!   A QR OR LQ FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( mn, stnd )*c1_5, i4b )
!
    two_stage = max( n, m )>=mnthr
!    two_stage = true
!    two_stage = false
!
!   CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing), rla(mn,mn),    &
                  s(mn), d(mn), e(mn), tauq(mn), taup(mn), tauo(mn), stat=iok )
    else
        allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing),      &
                  s(mn), d(mn), e(mn), tauq(mn), taup(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(mn), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:mn) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE nsing SINGULAR VALUES OF a AND
!   THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD
!   DECOMPOSITION OF a) IN THREE STEPS:
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM WITH SUBROUTINE bd_cmp.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN mat, tauq, taup, rla AND tauo.
!
        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn), rla(:mn,:mn), tauo=tauo(:mn) )
!        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn), rla(:mn,:mn) )
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION WITH SUBROUTINE bd_cmp.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN mat, tauq, taup.
!
        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn) )
!
    end if
!
!   STEP2 : COMPUTE nsing SINGULAR VALUES OF THE BIDIAGONAL MATRIX (WHICH ARE ALSO THE
!   SINGULAR VALUES OF a) WITH SUBROUTINE bd_singval2 TO HIGH RELATIVE PRECISION.
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d').
!
!   THE OPTIONAL ARGUMENT abstol OF bd_singval2 MAY BE USED TO INDICATE THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES. A SINGULAR VALUE IS CONSIDERED TO BE LOCATED
!   IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    call bd_singval2( d(:mn), e(:mn), ns, s(:mn), failure=failure1, sort=sort, &
                      abstol=abstol, ls=nsing )
!
!   ON EXIT OF bd_singval2 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION ALGORITHM
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!
!   ns GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY bd_singval2.
!   ns MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT.
!   THE ns LARGEST SINGULAR VALUES ARE STORED IN THE FIRST ns POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_inviter2 IN THE SECOND STEP.
!
!   STEP3 : COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!   INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION
!   WITH SUBROUTINE bd_inviter2 .
!
!   ON ENTRY OF bd_inviter2, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX
!   STORED IN VECTORS d AND e. THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    if ( two_stage ) then
!
        call bd_inviter2( a, tauq, taup, rla, d, e, s(:nsing), leftvec, rightvec, failure=failure2, tauo=tauo, maxiter=maxiter )
!        call bd_inviter2( a, tauq, taup, rla, d, e, s(:nsing), leftvec, rightvec, failure=failure2, maxiter=maxiter )
!
    else
!
        call bd_inviter2( a, tauq, taup, d, e, s(:nsing), leftvec, rightvec, failure=failure2, maxiter=maxiter )
!
    end if
!
!   ON EXIT OF bd_inviter2 :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!         failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE FOR SOME SINGULAR VECTORS
!
!   THE SINGULAR VECTORS OF THE BIDIAGONAL FORM ARE COMPUTED USING maxiter INVERSE ITERATIONS
!   FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF ARE THEN ORTHOGONALIZED BY 
!   THE MODIFIED GRAM-SCHMIDT ALGORITHM IF NECESSARY. THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED 
!   BY BACK TRANSFORMATIONS. ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT
!   SINGULAR VECTORS OF a, RESPECTIVELY.
!
!   NOTE THAT bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:nsing)/=zero )
                resid(:nsing,1_i4b) = s0(:nsing)
            elsewhere
                resid(:nsing,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsing) - s0(:nsing) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsing) - s0(:nsing))/resid(:nsing,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        a2(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
        err1 = maxval( a2(:nsing,1_i4b) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
!
        deallocate( a, leftvec, rightvec, rla, s, d, e, tauq, taup, tauo )
!
    else
!
        deallocate( a, leftvec, rightvec, s, d, e, tauq, taup )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_singval2() ) = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_inviter2() ) = ', failure2
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_inviter2
! ==============================
!
end program ex1_bd_inviter2

ex1_bd_inviter2_bis.F90

program ex1_bd_inviter2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_INVITER2
!   in module SVD_Procedures for computing a full or partial SVD of a real matrix using
!   the Chan-Golub-Reinsch bidiagonal reduction algorithm, a bisection algorithm for
!   singular values and the inverse iteration method for singular vectors.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Chan-Golub-Reinsch bidiagonal reduction, bisection and inverse iteration
!   algorithms are used.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP and BD_SINGVAL in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, safmin, bd_cmp, bd_inviter2, bd_singval, &
                         norm, unit_matrix, zero, one, seven, c30, c50, c1_5, c1_e6, merror,   &
                         allocate_error, random_seed_, random_number_, gen_random_mat
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn) FOR CASES GREATER THAN 0,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP nsing SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, mn=min(m,n), nsvd0=3000, &
                               nsing=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of bd_inviter2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, s0, d, e, tauq, taup, tauo
    real(stnd), allocatable, dimension(:,:) :: a, a2, leftvec, rightvec, resid, rla
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: ns, mnthr, i, mat_type
!
    logical(lgl) :: failure1, failure2, do_test, two_stage
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : nsing SINGULAR VALUES AND VECTORS OF A n-BY-m REAL MATRIX
!               USING A BISECTION METHOD FOR THE SINGULAR VALUES AND THE INVERSE
!               ITERATION METHOD FOR THE SINGULAR VECTORS (e.g., A PARTIAL SVD DECOMPOSITION).
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 3_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps  = fudge*epsilon( err )
    err  = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF max(n,m) EXCEEDS mnthr ,
!   A QR OR LQ FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( mn, stnd )*c1_5, i4b )
!
    two_stage = max( n, m )>=mnthr
!    two_stage = true
!    two_stage = false
!
!   CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing), rla(mn,mn),    &
                  s(mn), d(mn), e(mn), tauq(mn), taup(mn), tauo(mn), stat=iok )
    else
        allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing),      &
                  s(mn), d(mn), e(mn), tauq(mn), taup(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(mn), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:mn) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE nsing SINGULAR VALUES OF a AND
!   THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD
!   DECOMPOSITION OF a) IN THREE STEPS:
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM WITH SUBROUTINE bd_cmp.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN mat, tauq, taup, rla AND tauo.
!
        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn), rla(:mn,:mn), tauo=tauo(:mn) )
!        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn), rla(:mn,:mn) )
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION WITH SUBROUTINE bd_cmp.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN mat, tauq, taup.
!
        call bd_cmp( a(:n,:m), d(:mn), e(:mn), tauq(:mn), taup(:mn) )
!
    end if
!
!   STEP2 : COMPUTE nsing SINGULAR VALUES OF THE BIDIAGONAL MATRIX (WHICH ARE ALSO THE
!   SINGULAR VALUES OF a) WITH SUBROUTINE bd_singval TO HIGH RELATIVE PRECISION.
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d').
!
!   THE OPTIONAL ARGUMENT abstol OF bd_singval MAY BE USED TO INDICATE THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES. A SINGULAR VALUE IS CONSIDERED TO BE LOCATED
!   IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    call bd_singval( d(:mn), e(:mn), ns, s(:mn), failure=failure1, sort=sort, &
                     abstol=abstol, ls=nsing )
!
!   ON EXIT OF bd_singval :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION ALGORITHM
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!
!   ns GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY bd_singval.
!   ns MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT.
!   THE ns LARGEST SINGULAR VALUES ARE STORED IN THE FIRST ns POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_inviter2 IN THE SECOND STEP.
!
!   STEP3 : COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!   INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION
!   WITH SUBROUTINE bd_inviter2 .
!
!   ON ENTRY OF bd_inviter2, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX
!   STORED IN VECTORS d AND e. THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    if ( two_stage ) then
!
        call bd_inviter2( a, tauq, taup, rla, d, e, s(:nsing), leftvec, rightvec, failure=failure2, tauo=tauo, maxiter=maxiter )
!        call bd_inviter2( a, tauq, taup, rla, d, e, s(:nsing), leftvec, rightvec, failure=failure2, maxiter=maxiter )
!
    else
!
        call bd_inviter2( a, tauq, taup, d, e, s(:nsing), leftvec, rightvec, failure=failure2, maxiter=maxiter )
!
    end if
!
!   ON EXIT OF bd_inviter2 :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!         failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE FOR SOME SINGULAR VECTORS
!
!   THE SINGULAR VECTORS OF THE BIDIAGONAL FORM ARE COMPUTED USING maxiter INVERSE ITERATIONS
!   FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF ARE THEN ORTHOGONALIZED BY 
!   THE MODIFIED GRAM-SCHMIDT ALGORITHM IF NECESSARY. THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED 
!   BY BACK TRANSFORMATIONS. ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT
!   SINGULAR VECTORS OF a, RESPECTIVELY.
!
!   NOTE THAT bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:nsing)/=zero )
                resid(:nsing,1_i4b) = s0(:nsing)
            elsewhere
                resid(:nsing,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsing) - s0(:nsing) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsing) - s0(:nsing))/resid(:nsing,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        a2(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
        err1 = maxval( a2(:nsing,1_i4b) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
!
        deallocate( a, leftvec, rightvec, rla, s, d, e, tauq, taup, tauo )
!
    else
!
        deallocate( a, leftvec, rightvec, s, d, e, tauq, taup )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_singval() )  = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_inviter2() ) = ', failure2
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_inviter2
! ==============================
!
end program ex1_bd_inviter2

ex1_bd_lasq1.F90

program ex1_bd_lasq1
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_LASQ1
!   in module SVD_Procedures for computing all singular values of a n-by-n bidiagonal matrix.
!   The singular values are computed by the the differential quotient difference with
!   shifts (dqds) algorithm to high relative accuracy, in the absence of denormalization,
!   underflow and overflow.
!
!   Note that BD_LASQ1 is usually faster than BD_SINGVAL and BD_SINGVAL2 subroutines, which
!   are based on bisection, for computing all singular values of a real bidiagonal matrix,
!   but that BD_LASQ1 does not allow computing selected singular values of a bidiagonal matrix.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER in module SVD_Procedures
!    fo computing singular vectors of a real bidiagonal matrix.
!
!
! LATEST REVISION : 08/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, quarter, one, two, five, c50, c200,  &
                         bd_inviter, bd_lasq1, unit_matrix, norm, geop, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE BIDIAGONAL MATRIX,
! ls IS THE NUMBER OF THE LARGEST SINGULAR VECTOR COUPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=1000, ls=15, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of bd_lasq1'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, s, e2
    real(stnd), allocatable, dimension(:,:) :: id, resid, leftvec, rightvec
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: mat_type, j, i, info
!
    logical(lgl) :: failure1=false, failure2=false, bd_is_upper, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : ALL SINGULAR VALUES AND ls SINGULAR VECTORS OF A n-BY-n REAL BIDIAGONAL MATRIX
!               bd BY THE DQDS-INVERSE ITERATION METHOD (E.G. PARTIAL SVD DECOMPOSITION).
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( d(n), e(n), s(n), e2(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE AN UPPER BIDIAGONAL MATRIX bd .
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    bd_is_upper = true
    e(1_i4b)    = zero
!
!   SPECIFY THE TYPE OF BIDIAGONAL MATRIX:
!
    mat_type = 9_i4b
!
!   GENERATE THE BIDIAGONAL MATRIX.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           bd IS A GRADED MATRIX.
!
            d(:n)      = geop( one, quarter, n )
            e(2_i4b:n) = d(:n-1_i4b)
!
        case( 2_i4b )
!
!           bd IS A TOEPLITZ MATRIX.
!
            d(:n)      = one
            e(2_i4b:n) = two
!
        case( 3_i4b )
!
            e(2_i4b:n) = one
            do j=1_i4b, n
	        d(j) = real( n + 1_i4b - j, stnd )
            end do
!
        case( 4_i4b )
!
            do j=1_i4b, n-1_i4b
	        d(j)       = real( n + 1_i4b - j, stnd )
                e(j+1_i4b) = d(j)/five
            end do
	    d(n) = one
!
        case( 5_i4b )
!
            do j=1_i4b, (n+1_i4b)/2_i4b
                d(j) = real( j, stnd )
            end do
!
            do j=(n+1_i4b)/2_i4b+1_i4b, n
                d(j) = real( n - j, stnd )
            end do
!
	    do j=1, n-1_i4b
                e(j+1_i4b) = d(j)/five
            end do
!
        case( 6_i4b )
!
            do j=1_i4b, n/2_i4b
                d(2_i4b*j-1_i4b) = real( n + 1_i4b - j, stnd )
                d(2_i4b*j)       = real( j, stnd )
            end do
!
	    do j=1_i4b, n-1_i4b
                e(j+1_i4b) = real( (n-j)/5_i4b, stnd )
	    end do
!
        case( 7_i4b )
!
            d(1_i4b)   = one
            d(2_i4b:n) = c200*epsilon( err )
            e(2_i4b:n) = c200*epsilon( err )
!
        case( 8_i4b )
!
            d(n) = 1e-20
            do j=n-1_i4b, 1_i4b, -1_i4b
	        d(j)       = d(j+1_i4b)*1.01_stnd
                e(j+1_i4b) = d(j)
            end do
!
        case( 9_i4b )
!
            do j=0_i4b, n/11_i4b-1_i4b
!
                do i=1_i4b, 6_i4b
                    d(11_i4b*j+i) = real( 1_i4b+10_i4b*(i-1_i4b), stnd )
                    e(11_i4b*j+i+1_i4b) = one
                end do
!
                do i=7_i4b,10_i4b
                    d(11_i4b*j+i) = real( 1_i4b+10_i4b*(11_i4b-i), stnd )
                    e(11_i4b*j+i+1_i4b) = one
                end do
!
                d(11_i4b*j+11_i4b) = one
!
                if ( 11_i4b*j+12_i4b<=n ) then
                    d(11_i4b*j+12_i4b) = one
                    e(11_i4b*j+12_i4b) = 1e-4
                end if
!
            end do
!
        case default
!
!           bd IS A UNIFORM RANDOM MATRIX.
!
            call random_number( d(:n) )
            call random_number( e(2_i4b:n) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( d ) ) then
!
        if ( .not.all( ieee_is_normal( d ) ) .and. .not.all( ieee_is_normal( e ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input bidiagonal matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   MAKE A COPY OF THE BIDIAGONAL MATRIX.
!
    s(:n)  = d(:n)
    e2(:n) = e(:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-n BIDIAGONAL MATRIX bd
!   IS WRITTEN
!
!                       bd = u * s * v**(t)
!
!   WHERE s IS AN n-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   n DIAGONAL ELEMENTS, u AND v ARE n-BY-n ORTHOGONAL MATRICES.
!   THE DIAGONAL ELEMENTS OF s ARE THE SINGULAR VALUES OF bd; THEY ARE
!   REAL AND NON-NEGATIVE.
!   THE COLUMNS OF u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF bd.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE ALL THE SINGULAR VALUES OF bd AND
!   ITS LEADING LEFT AND RIGHT SINGULAR VECTORS (E.G. A PARTIAL SVD DECOMPOSITION OF bd)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE ALL THE SINGULAR VALUES OF bd BY THE DQDS ALGORITHM
!   WITH SUBROUTINE bd_lasq1.
!
!   ON ENTRY OF bd_lasq1, s(:n) AND e2(:n) CONTAIN, RESPECTIVELY, THE DIAGONAL AND SUBDIAGONAL
!   ELEMENTS OF THE BIDIAGONAL MATRIX.
!
    call bd_lasq1( s(:n), e2(:n), failure=failure1, sort=sort, info=info )
!
!   ON EXIT OF bd_lasq1 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE DQDS ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF bd.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_inviter IN THE SECOND STEP.
!
!   STEP2 : COMPUTE THE ls LEADING LEFT AND RIGHT SINGULAR VECTORS OF bd BY INVERSE
!   ITERATION WITH SUBROUTINE bd_inviter :
!
    if ( ls>0 .and. .not.failure1 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,ls),  &
                  rightvec(n,ls), &
                  stat = iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST ls SINGULAR VECTORS OF bd BY maxiter INVERSE ITERATIONS WITH
!       SUBROUTINE bd_inviter .
!
        call bd_inviter( bd_is_upper, d(:n), e(:n), s(:ls), leftvec(:n,:ls), rightvec(:n,:ls),  &
                         failure=failure2, maxiter=maxiter )
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX bd .
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       ON EXIT OF bd_inviter :
!
!          failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM.
!          failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE FOR SOME SINGULAR VECTORS.
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF bd ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM OR A BLOCKED QR ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST ls LEFT AND RIGHT SINGULAR VECTORS OF bd, RESPECTIVELY.
!
!       bd_inviter MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY IDENTICAL.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. ls>0  .and. .not.failure1 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( id(ls,ls), resid(n,ls), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION bd*v - u*singval(:ls),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF bd .
!
        if ( bd_is_upper ) then
!
            resid(:n,:ls) = spread( d(:n),         dim=2, ncopies=ls )*rightvec                 + &
                            eoshift( spread(e(:n), dim=2,ncopies=ls)*rightvec, shift=1 )        - &
                            spread( s(:ls),     dim=1, ncopies=n )*leftvec 

        else
!
            resid(:n,:ls) = spread( d(:n),         dim=2, ncopies=ls )*leftvec                   + &
                            eoshift( spread(e(:n), dim=2,ncopies=ls)*leftvec, shift=1 )          - &
                            spread( s(:ls),     dim=1, ncopies=n )*rightvec
!
        end if
!
        id(:ls,1_i4b) = norm( resid(:n,:ls), dim=2_i4b )
        err1 = maxval( id(:ls,1_i4b) )/( sum( s(:ls) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are LEFT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd .
!
        call unit_matrix( id )
!
        resid(:ls,:ls) = abs( id - matmul( transpose( leftvec ), leftvec ) )
        err2 = maxval( resid(:ls,:ls) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - rightvec**(t)*rightvec
!       WHERE rightvec ARE THE RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd .
!
        resid(:ls,:ls) = abs( id - matmul( transpose( rightvec ), rightvec ) )
        err3 = maxval( resid(:ls,:ls) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( id, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( ls>0 .and. .not.failure1 ) then
        deallocate( d, e, s, e2, leftvec, rightvec )
    else
        deallocate( d, e, s, e2 )
    end if
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_lasq1() )   = ', failure1
    write (prtunit,*) ' INFO    ( from bd_lasq1() )   = ', info
    if ( ls>0 .and. .not.failure1 ) then
        write (prtunit,*) ' FAILURE ( from bd_inviter() ) = ', failure2
    end if
!
    if ( do_test .and. ls>0 .and. .not.failure1 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', ls
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and', ls, ' singular vectors of a ', &
       n, ' by ', n,' real bidiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_lasq1
! ===========================
!
end program ex1_bd_lasq1

ex1_bd_singval.F90

program ex1_bd_singval
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_SINGVAL
!   in module SVD_Procedures for computing singular values of a real n-by-n bidiagonal matrix.
!   The singular values are computed by the bisection method
!   and can be obtained to high relative accurary at the user choice.
!
!   Note that BD_SINGVAL is more accurate than BD_SINGVAL2 subroutine, which can also
!   be used for this purpose.                                                     
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER in module SVD_Procedures
!    fo computing singular vectors of a real bidiagonal matrix.
!
!
! LATEST REVISION : 08/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, quarter, one, two, five, c50, &
                         c200, safmin, bd_inviter, bd_singval, unit_matrix, norm, geop,   &
                         merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE BIDIAGONAL MATRIX,
! ls IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=1000, ls=20, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of bd_singval'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, abstol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, s
    real(stnd), allocatable, dimension(:,:) :: id, resid, leftvec, rightvec
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: mat_type, j, i, nsing
!
    logical(lgl) :: failure1=false, failure2=false, bd_is_upper, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : ls SINGULAR VALUES AND VECTORS OF A n-BY-n REAL BIDIAGONAL MATRIX bd
!               BY THE BISECTION-INVERSE ITERATION METHOD (E.G. PARTIAL SVD DECOMPOSITION).
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   FIRST CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( d(n), e(n), s(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE AN UPPER BIDIAGONAL MATRIX bd.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    bd_is_upper = true
    e(1_i4b)    = zero
!
!   SPECIFY THE TYPE OF BIDIAGONAL MATRIX:
!
    mat_type = 9_i4b
!
!   GENERATE THE BIDIAGONAL MATRIX.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           bd IS A GRADED MATRIX.
!
            d(:n)      = geop( one, quarter, n )
            e(2_i4b:n) = d(:n-1_i4b)
!
        case( 2_i4b )
!
!           bd IS A TOEPLITZ MATRIX.
!
            d(:n)      = one
            e(2_i4b:n) = two
!
        case( 3_i4b )
!
            e(2_i4b:n) = one
            do j=1_i4b, n
	        d(j) = real( n + 1_i4b - j, stnd )
            end do
!
        case( 4_i4b )
!
            do j=1_i4b, n-1_i4b
	        d(j)       = real( n + 1_i4b - j, stnd )
                e(j+1_i4b) = d(j)/five
            end do
	    d(n) = one
!
        case( 5_i4b )
!
            do j=1_i4b, (n+1_i4b)/2_i4b
                d(j) = real( j, stnd )
            end do
!
            do j=(n+1_i4b)/2_i4b+1_i4b, n
                d(j) = real( n - j, stnd )
            end do
!
	    do j=1, n-1_i4b
                e(j+1_i4b) = d(j)/five
            end do
!
        case( 6_i4b )
!
            do j=1_i4b, n/2_i4b
                d(2_i4b*j-1_i4b) = real( n + 1_i4b - j, stnd )
                d(2_i4b*j)       = real( j, stnd )
            end do
!
	    do j=1_i4b, n-1_i4b
                e(j+1_i4b) = real( (n-j)/5_i4b, stnd )
	    end do
!
        case( 7_i4b )
!
            d(1_i4b)   = one
            d(2_i4b:n) = c200*epsilon( err )
            e(2_i4b:n) = c200*epsilon( err )
!
        case( 8_i4b )
!
            d(n) = 1e-20
            do j=n-1_i4b, 1_i4b, -1_i4b
	        d(j)       = d(j+1_i4b)*1.01_stnd
                e(j+1_i4b) = d(j)
            end do
!
        case( 9_i4b )
!
            do j=0_i4b, n/11_i4b-1_i4b
!
                do i=1_i4b, 6_i4b
                    d(11_i4b*j+i) = real( 1_i4b+10_i4b*(i-1_i4b), stnd )
                    e(11_i4b*j+i+1_i4b) = one
                end do
!
                do i=7_i4b,10_i4b
                    d(11_i4b*j+i) = real( 1_i4b+10_i4b*(11_i4b-i), stnd )
                    e(11_i4b*j+i+1_i4b) = one
                end do
!
                d(11_i4b*j+11_i4b) = one
!
                if ( 11_i4b*j+12_i4b<=n ) then
                    d(11_i4b*j+12_i4b) = one
                    e(11_i4b*j+12_i4b) = 1e-4
                end if
!
            end do
!
        case default
!
!           bd IS A UNIFORM RANDOM MATRIX.
!
            call random_number( d(:n) )
            call random_number( e(2_i4b:n) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( d ) ) then
!
        if ( .not.all( ieee_is_normal( d ) ) .and. .not.all( ieee_is_normal( e ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input bidiagonal matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-n BIDIAGONAL MATRIX bd
!   IS WRITTEN
!
!                       bd = u * s * v**(t)
!
!   WHERE s IS AN n-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   n DIAGONAL ELEMENTS, u AND v ARE n-BY-n ORTHOGONAL MATRICES.
!   THE DIAGONAL ELEMENTS OF s ARE THE SINGULAR VALUES OF bd; THEY ARE
!   REAL AND NON-NEGATIVE.
!   THE COLUMNS OF u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF bd.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF bd AND
!   THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS (E.G. A PARTIAL SVD DECOMPOSITION OF bd)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF bd BY BISECTION
!   WITH SUBROUTINE bd_singval.
!
!   THE OPTIONAL ARGUMENT abstol OF bd_singval MAY BE USED TO INDICATE THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES. A SINGULAR VALUE IS CONSIDERED TO BE LOCATED
!   IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    call bd_singval( d(:n), e(:n), nsing, s(:n), failure=failure1, sort=sort, abstol=abstol, ls=ls )
!
!   ON EXIT OF bd_singval :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION ALGORITHM
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF bd.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY bd_singval.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_inviter IN THE SECOND STEP.
!
!   STEP2 : COMPUTE THE nsing ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF bd BY INVERSE
!   ITERATION WITH SUBROUTINE bd_inviter :
!
    if ( nsing>0 .and. .not.failure1 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(n,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing SINGULAR VECTORS OF bd BY maxiter INVERSE ITERATIONS WITH
!       SUBROUTINE bd_inviter .
!
        call bd_inviter( bd_is_upper, d(:n), e(:n), s(:nsing), leftvec(:n,:nsing), rightvec(:n,:nsing),  &
                         failure=failure2, maxiter=maxiter )
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX bd .
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       ON EXIT OF bd_inviter :
!
!          failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!          failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE FOR SOME SINGULAR VECTORS
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF bd ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF bd, RESPECTIVELY.
!
!       bd_inviter MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 .and. .not.failure1 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( id(nsing,nsing), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION bd*v - u*singval(:nsing),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF bd .
!
        if ( bd_is_upper ) then
!
            resid(:n,:nsing) = spread( d(:n),         dim=2, ncopies=nsing )*rightvec                 + &
                               eoshift( spread(e(:n), dim=2,ncopies=nsing)*rightvec, shift=1 )        - &
                               spread( s(:nsing),     dim=1, ncopies=n )*leftvec 

        else
!
            resid(:n,:nsing) = spread( d(:n),         dim=2, ncopies=nsing )*leftvec                   + &
                               eoshift( spread(e(:n), dim=2,ncopies=nsing)*leftvec, shift=1 )          - &
                               spread( s(:nsing),     dim=1, ncopies=n )*rightvec
!
        end if
!
        id(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
        err1 = maxval( id(:nsing,1_i4b) )/( sum( s(:nsing) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are LEFT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd .
!
        call unit_matrix( id )
!
        resid(:nsing,:nsing) = abs( id - matmul( transpose( leftvec ), leftvec ) )
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - rightvec**(t)*rightvec
!       WHERE rightvec ARE THE RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd .
!
        resid(:nsing,:nsing) = abs( id - matmul( transpose( rightvec ), rightvec ) )
        err3 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( id, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( nsing>0  .and. .not.failure1 ) then
        deallocate( d, e, s, leftvec, rightvec )
    else
        deallocate( d, e, s )
    end if
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_singval() ) = ', failure1
    if ( nsing>0  .and. .not.failure1 ) then
        write (prtunit,*) ' FAILURE ( from bd_inviter() ) = ', failure2
    end if
!
    if ( do_test .and. nsing>0  .and. .not.failure1 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', n,' real bidiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_singval
! =============================
!
end program ex1_bd_singval

ex1_bd_singval2.F90

program ex1_bd_singval2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_SINGVAL2
!   in module SVD_Procedures for computing singular values of a real n-by-n bidiagonal matrix.
!   The singular values are computed by the bisection method
!   and can be obtained to high relative accurary at the user choice.
!
!   Note that BD_SINGVAL2 is less accurate than BD_SINGVAL subroutine, which can also
!   be used for this purpose, but is faster.                                                     
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER in module SVD_Procedures
!    fo computing singular vectors of a real bidiagonal matrix.
!
!
! LATEST REVISION : 08/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, quarter, one, two, five, c50, &
                         c200, safmin, bd_inviter, bd_singval2, unit_matrix, norm, geop,  &
                         merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE BIDIAGONAL MATRIX,
! ls IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=1000, ls=10, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of bd_singval2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, abstol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, s
    real(stnd), allocatable, dimension(:,:) :: id, resid, leftvec, rightvec
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: mat_type, j, i, nsing
!
    logical(lgl) :: failure1=false, failure2=false, bd_is_upper, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : ls SINGULAR VALUES AND VECTORS OF A n-BY-n REAL BIDIAGONAL MATRIX bd
!               BY THE BISECTION-INVERSE ITERATION METHOD (E.G. PARTIAL SVD DECOMPOSITION).
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   FIRST CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( d(n), e(n), s(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE AN UPPER BIDIAGONAL MATRIX bd.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    bd_is_upper   = true
    e(1_i4b)   = zero
!
!   SPECIFY THE TYPE OF BIDIAGONAL MATRIX:
!
    mat_type = 9_i4b
!
!   GENERATE THE BIDIAGONAL MATRIX.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           bd IS A GRADED MATRIX.
!
            d(:n)      = geop( one, quarter, n )
            e(2_i4b:n) = d(:n-1_i4b)
!
        case( 2_i4b )
!
!           bd IS A TOEPLITZ MATRIX.
!
            d(:n)      = one
            e(2_i4b:n) = two
!
        case( 3_i4b )
!
            e(2_i4b:n) = one
            do j=1_i4b, n
	        d(j) = real( n + 1_i4b - j, stnd )
            end do
!
        case( 4_i4b )
!
            do j=1_i4b, n-1_i4b
	        d(j)       = real( n + 1_i4b - j, stnd )
                e(j+1_i4b) = d(j)/five
            end do
	    d(n) = one
!
        case( 5_i4b )
!
            do j=1_i4b, (n+1_i4b)/2_i4b
                d(j) = real( j, stnd )
            end do
!
            do j=(n+1_i4b)/2_i4b+1_i4b, n
                d(j) = real( n - j, stnd )
            end do
!
	    do j=1, n-1_i4b
                e(j+1_i4b) = d(j)/five
            end do
!
        case( 6_i4b )
!
            do j=1_i4b, n/2_i4b
                d(2_i4b*j-1_i4b) = real( n + 1_i4b - j, stnd )
                d(2_i4b*j)       = real( j, stnd )
            end do
!
	    do j=1_i4b, n-1_i4b
                e(j+1_i4b) = real( (n-j)/5_i4b, stnd )
	    end do
!
        case( 7_i4b )
!
            d(1_i4b)   = one
            d(2_i4b:n) = c200*epsilon( err )
            e(2_i4b:n) = c200*epsilon( err )
!
        case( 8_i4b )
!
            d(n) = 1e-20
            do j=n-1_i4b, 1_i4b, -1_i4b
	        d(j)       = d(j+1_i4b)*1.01_stnd
                e(j+1_i4b) = d(j)
            end do
!
        case( 9_i4b )
!
            do j=0_i4b, n/11_i4b-1_i4b
!
                do i=1_i4b, 6_i4b
                    d(11_i4b*j+i) = real( 1_i4b+10_i4b*(i-1_i4b), stnd )
                    e(11_i4b*j+i+1_i4b) = one
                end do
!
                do i=7_i4b,10_i4b
                    d(11_i4b*j+i) = real( 1_i4b+10_i4b*(11_i4b-i), stnd )
                    e(11_i4b*j+i+1_i4b) = one
                end do
!
                d(11_i4b*j+11_i4b) = one
!
                if ( 11_i4b*j+12_i4b<=n ) then
                    d(11_i4b*j+12_i4b) = one
                    e(11_i4b*j+12_i4b) = 1e-4
                end if
!
            end do
!
        case default
!
!           bd IS A UNIFORM RANDOM MATRIX.
!
            call random_number( d(:n) )
            call random_number( e(2_i4b:n) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( d ) ) then
!
        if ( .not.all( ieee_is_normal( d ) ) .and. .not.all( ieee_is_normal( e ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input bidiagonal matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-n BIDIAGONAL MATRIX bd
!   IS WRITTEN
!
!                       bd = u * s * v**(t)
!
!   WHERE s IS AN n-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   n DIAGONAL ELEMENTS, u AND v ARE n-BY-n ORTHOGONAL MATRICES.
!   THE DIAGONAL ELEMENTS OF s ARE THE SINGULAR VALUES OF bd; THEY ARE
!   REAL AND NON-NEGATIVE.
!   THE COLUMNS OF u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF bd.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF bd AND
!   THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS (E.G. A PARTIAL SVD DECOMPOSITION OF bd)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF bd BY BISECTION
!   WITH SUBROUTINE bd_singval2.
!
!   THE OPTIONAL ARGUMENT abstol OF bd_singval2 MAY BE USED TO INDICATE THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES. A SINGULAR VALUE IS CONSIDERED TO BE LOCATED
!   IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    call bd_singval2( d(:n), e(:n), nsing, s(:n), failure=failure1, sort=sort, abstol=abstol, ls=ls )
!
!   ON EXIT OF bd_singval2 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION ALGORITHM
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF bd.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY bd_singval2.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_inviter IN THE SECOND STEP.
!
!   STEP2 : COMPUTE THE nsing ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF bd BY INVERSE
!   ITERATION WITH SUBROUTINE bd_inviter :
!
    if ( nsing>0 .and. .not.failure1 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(n,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing SINGULAR VECTORS OF a BY maxiter INVERSE ITERATIONS WITH
!       SUBROUTINE bd_inviter .
!
        call bd_inviter( bd_is_upper, d(:n), e(:n), s(:nsing), leftvec(:n,:nsing), rightvec(:n,:nsing),  &
                         failure=failure2, maxiter=maxiter )
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX bd .
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       ON EXIT OF bd_inviter :
!
!          failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!          failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE FOR SOME SINGULAR VECTORS
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF bd ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF bd, RESPECTIVELY.
!
!       bd_inviter MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 .and. .not.failure1 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( id(nsing,nsing), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION bd*v - u*singval(:nsing),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF bd .
!
        if ( bd_is_upper ) then
!
            resid(:n,:nsing) = spread( d(:n),         dim=2, ncopies=nsing )*rightvec                 + &
                               eoshift( spread(e(:n), dim=2,ncopies=nsing)*rightvec, shift=1 )        - &
                               spread( s(:nsing),     dim=1, ncopies=n )*leftvec 

        else
!
            resid(:n,:nsing) = spread( d(:n),         dim=2, ncopies=nsing )*leftvec                   + &
                               eoshift( spread(e(:n), dim=2,ncopies=nsing)*leftvec, shift=1 )          - &
                               spread( s(:nsing),     dim=1, ncopies=n )*rightvec
!
        end if
!
        id(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
        err1 = maxval( id(:nsing,1_i4b) )/( sum( s(:nsing) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are LEFT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        call unit_matrix( id )
!
        resid(:nsing,:nsing) = abs( id - matmul( transpose( leftvec ), leftvec ) )
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - rightvec**(t)*rightvec
!       WHERE rightvec ARE THE RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        resid(:nsing,:nsing) = abs( id - matmul( transpose( rightvec ), rightvec ) )
        err3 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( id, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( nsing>0  .and. .not.failure1 ) then
        deallocate( d, e, s, leftvec, rightvec )
    else
        deallocate( d, e, s )
    end if
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_singval2() ) = ', failure1
    if ( nsing>0  .and. .not.failure1 ) then
        write (prtunit,*) ' FAILURE ( from bd_inviter() )  = ', failure2
    end if
!
    if ( do_test .and. nsing>0 .and. .not.failure1 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', n,' real bidiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_singval2
! ==============================
!
end program ex1_bd_singval2

ex1_bd_svd.F90

program ex1_bd_svd
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_SVD
!   in module SVD_Procedures for computing the full SVD decomposition of
!   a bidiagonal matrix. The decomposition is computed by the QR implicit
!   shift method.                                                                
!
!   The computations are parallelized if OpenMP is used and an highly efficient variant
!   of the Golub-Reinsch bidiagonal QR implicit shift algorithm is used.
!
!
! LATEST REVISION : 14/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, one, c1_m2, c900, c50, &
                         bd_svd, unit_matrix, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE BIDIAGONAL MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of bd_svd'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diag, sup, sup2, singval
    real(stnd), allocatable, dimension(:,:) :: leftvec, rightvec, id, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, bd_is_upper, perfect_shift, bisect, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FULL SVD OF A REAL BIDIAGONAL MATRIX USING THE BIDIAGONAL QR IMPLICIT SHIFT METHOD
!               WITH A WAVE-FRONT ALGORITHM FOR APPLYING GIVENS ROTATIONS IN THE BIDIAGONAL
!               QR ALGORITHM AND, OPTIONALLY, A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS.
!               THE SINGULAR VECTORS ARE RETURNED COLUMNWISE.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF A PERFECT SHIFT STRATEGY MUST BE USED
!   IN THE BIDIAGONAL SVD ALGORITHM.
!
    perfect_shift = true
!
!   SPECIFY IF BISECTION IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM.
!
    bisect = false
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( diag(n), sup(n), sup2(n), singval(n),   &
              leftvec(n,n), rightvec(n,n), stat=iok   )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE AN UPPER BIDIAGONAL MATRIX BD.
!   THE DIAGONAL ELEMENTS ARE STORED IN diag .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN sup .
!
    bd_is_upper   = true
!
    sup(1_i4b)   = zero
!
!    diag(:n)     = half
!    sup(2_i4b:n) = one
!
!    diag(:n)     = c1_m2
!    sup(2_i4b:n) = c900
!
    call random_number( diag(:n) )
    call random_number( sup(2_i4b:n) )
!
!   MAKE A COPY OF THE BIDIAGONAL MATRIX.
!
    singval(:n) = diag(:n)
    sup2(:n)    = sup(:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE ALL SINGULAR VALUES AND VECTORS OF THE BIDIAGONAL MATRIX BD .
!
!   FIST INITALIZED THE LEFT AND RIGHT SINGULAR VECTORS TO THE IDENTITY MATRIX OF ORDER n.
!
    call unit_matrix( leftvec(:n,:n) )
    call unit_matrix( rightvec(:n,:n) )
!
!   bd_svd COMPUTES THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   n-BY-n (UPPER OR LOWER) BIDIAGONAL MATRIX BD. THE SVD IS WRITTEN
!
!                       BD = U * S * V**(t)
!
!   WHERE S IS AN n-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF BD; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF BD.
!
!   BY DEFAULT, bd_svd USES A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS
!   SINCE IT IS USUALLY FASTER.
!   IF YOU DONT WANT TO USE THIS STRATEGY, YOU CAN SPECIFY THE OPTIONAL LOGICAL
!   PARAMETER perfect_shift WITH THE VALUE false.
!
    call bd_svd( bd_is_upper, singval(:n), sup2(:n), failure, leftvec(:n,:n), &
                 rightvec(:n,:n), sort=sort, perfect_shift=perfect_shift,     &
                 bisect=bisect )
!
!   THE ROUTINE RETURNS THE SINGULAR VALUES, THE LEFT AND RIGHT
!   SINGULAR VECTORS. THE SINGULAR VECTORS ARE RETURNED COLUMNWISE.
!
!   ON EXIT OF bd_svd :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF BD.
!
!       singval  IS OVERWRITTEN WITH THE SINGULAR VALUES OF BD.
!       leftvec  IS OVERWRITTEN WITH THE LEFT SINGULAR VECTORS OF BD  IF leftvec IS THE IDENTITY ON ENTRY.
!       rightvec IS OVERWRITTEN WITH THE RIGHT SINGULAR VECTORS OF BD IF rightvec IS THE IDENTITY ON ENTRY.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   THE SINGULAR VECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( id(n,n), resid(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION bd*rightvec - leftvec*singval(:n),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS.
!
        if ( bd_is_upper ) then
!
            resid(:n,:n) = spread( diag(:n),        dim=2, ncopies=n )*rightvec                 + &
                           eoshift( spread(sup(:n), dim=2,ncopies=n)*rightvec, shift=1 )        - &
                           spread( singval(:n), dim=1, ncopies=n )*leftvec 

        else
!
            resid(:n,:n) = spread( diag(:n),        dim=2, ncopies=n )*leftvec                   + &
                           eoshift( spread(sup(:n), dim=2,ncopies=n)*leftvec, shift=1 )          - &
                           spread( singval(:n), dim=1, ncopies=n )*rightvec
!
        end if
!
        id(:n,1_i4b) = norm( resid(:n,:n), dim=2_i4b )
        err1 = maxval( id(:n,1_i4b) )/( sum( singval(:n) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are LEFT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        call unit_matrix( id )
!
        resid = abs( id - matmul( transpose( leftvec ), leftvec ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v**(t)*v
!       WHERE v are RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        resid = abs( id - matmul( transpose( rightvec ), rightvec ) )
        err3 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( id, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( diag, sup, sup2, singval, leftvec, rightvec )
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_svd() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and singular vectors of a ', &
       n, ' by ', n,' real bidiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_svd
! =========================
!
end program ex1_bd_svd

ex1_bd_svd2.F90

program ex1_bd_svd2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_SVD2
!   in module SVD_Procedures for computing the full SVD decomposition of
!   a bidiagonal matrix. The decomposition is computed by the QR implicit
!   shift method.                                                                
!   BD_SVD2 is similar to BD_SVD subroutine, but output the singular vectors
!   in LAPACK style format. This means that the right singular vectors of the
!   bidiagonal matrix are returned rowwise instead of columnwise as in BD_SVD
!   subroutine.
!
!   The computations are parallelized if OpenMP is used and an highly efficient variant
!   of the Golub-Reinsch bidiagonal QR implicit shift algorithm is used.
!
!
! LATEST REVISION : 14/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, one, c1_m2, c900, c50, &
                         bd_svd2, unit_matrix, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE BIDIAGONAL MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of bd_svd2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diag, sup, sup2, singval
    real(stnd), allocatable, dimension(:,:) :: leftvec, rightvec, id, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, bd_is_upper, perfect_shift, bisect, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FULL SVD OF A REAL BIDIAGONAL MATRIX USING THE BIDIAGONAL QR IMPLICIT SHIFT METHOD
!               WITH A WAVE-FRONT ALGORITHM FOR APPLYING GIVENS ROTATIONS IN THE BIDIAGONAL
!               QR ALGORITHM AND, OPTIONALLY, A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS.
!               THE SINGULAR VECTORS ARE OUTPUT IN LAPACK STYLE FORMAT, MEANING THAT THE RIGHT
!               SINGULAR VECTORS ARE RETURNED ROWWISE INSTEAD OF COLUMNWISE.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF A PERFECT SHIFT STRATEGY MUST BE USED
!   IN THE BIDIAGONAL SVD ALGORITHM.
!
    perfect_shift = true
!
!   SPECIFY IF BISECTION IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM.
!
    bisect = false
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( diag(n), sup(n), sup2(n), singval(n),   &
              leftvec(n,n), rightvec(n,n), stat=iok   )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE AN UPPER BIDIAGONAL MATRIX BD.
!   THE DIAGONAL ELEMENTS ARE STORED IN diag .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN sup .
!
    bd_is_upper   = true
!
    sup(1_i4b)   = zero
!
!    diag(:n)     = half
!    sup(2_i4b:n) = one
!
!    diag(:n)     = c1_m2
!    sup(2_i4b:n) = c900
!
    call random_number( diag(:n) )
    call random_number( sup(2_i4b:n) )
!
!   MAKE A COPY OF THE BIDIAGONAL MATRIX.
!
    singval(:n) = diag(:n)
    sup2(:n)    = sup(:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE ALL SINGULAR VALUES AND VECTORS OF THE BIDIAGONAL MATRIX BD .
!
!   FIST INITALIZED THE LEFT AND RIGHT SINGULAR VECTORS TO THE IDENTITY MATRIX OF ORDER n.
!
    call unit_matrix( leftvec(:n,:n) )
    call unit_matrix( rightvec(:n,:n) )
!
!   bd_svd2 COMPUTES THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   n-BY-n (UPPER OR LOWER) BIDIAGONAL MATRIX BD. THE SVD IS WRITTEN
!
!                       BD = U * S * V**(t)
!
!   WHERE S IS AN n-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF BD; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF BD.
!
!   BY DEFAULT, bd_svd2 USES A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS
!   SINCE IT IS USUALLY FASTER.
!   IF YOU DONT WANT TO USE THIS STRATEGY, YOU CAN SPECIFY THE OPTIONAL LOGICAL
!   PARAMETER perfect_shift WITH THE VALUE false.
!
    call bd_svd2( bd_is_upper, singval(:n), sup2(:n), failure, leftvec(:n,:n), &
                  rightvec(:n,:n), sort=sort, perfect_shift=perfect_shift,     &
                  bisect=bisect )
!
!   THE ROUTINE RETURNS THE SINGULAR VALUES, THE LEFT AND RIGHT
!   SINGULAR VECTORS. THE LEFT SINGULAR VECTORS ARE RETURNED COLUMNWISE,
!   BUT THE RIGHT SINGULAR VECTORS ARE RETURNED ROWWISE. THIS IS THE LAPACK
!   CONVENTION AND IS THE ONLY DIFFERENCE BETWEEN bd_svd2 AND bd_svd SUBROUTINES.
!
!   ON EXIT OF bd_svd2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF BD.
!
!       singval  IS OVERWRITTEN WITH THE SINGULAR VALUES OF BD.
!       leftvec  IS OVERWRITTEN WITH THE LEFT SINGULAR VECTORS OF BD  IF leftvec IS THE IDENTITY ON ENTRY.
!       rightvec IS OVERWRITTEN WITH THE TRANSPOSE OF THE RIGHT SINGULAR VECTORS OF BD IF rightvec IS THE IDENTITY ON ENTRY.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   THE SINGULAR VECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( id(n,n), resid(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       TRANSPOSE THE RIGHT SINGULAR VECTORS SO THAT THEY ARE STORED COLUMNWISE.
!
        resid(:n,:n) = transpose( rightvec(:n,:n) )
        rightvec(:n,:n) = resid(:n,:n)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION bd*rightvec - leftvec*singval(:n),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS.
!
        if ( bd_is_upper ) then
!
            resid(:n,:n) = spread( diag(:n),        dim=2, ncopies=n )*rightvec                 + &
                           eoshift( spread(sup(:n), dim=2,ncopies=n)*rightvec, shift=1 )        - &
                           spread( singval(:n), dim=1, ncopies=n )*leftvec 

        else
!
            resid(:n,:n) = spread( diag(:n),        dim=2, ncopies=n )*leftvec                   + &
                           eoshift( spread(sup(:n), dim=2,ncopies=n)*leftvec, shift=1 )          - &
                           spread( singval(:n), dim=1, ncopies=n )*rightvec
!
        end if
!
        id(:n,1_i4b) = norm( resid(:n,:n), dim=2_i4b )
        err1 = maxval( id(:n,1_i4b) )/( sum( singval(:n) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are LEFT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        call unit_matrix( id )
!
        resid = abs( id - matmul( transpose( leftvec ), leftvec ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v**(t)*v
!       WHERE v are RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX bd.
!
        resid = abs( id - matmul( transpose( rightvec ), rightvec ) )
        err3 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( id, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( diag, sup, sup2, singval, leftvec, rightvec )
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_svd2() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and singular vectors of a ', &
       n, ' by ', n,' real bidiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_bd_svd2
! ==========================
!
end program ex1_bd_svd2

ex1_chol_cmp.F90

program ex1_chol_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines CHOL_CMP and CHOL_SOLVE
!   in module Lin_Procedures for computing a Cholesky decomposition of a real symmetric
!   positive-definite matrix and solving a linear system with such matrix as a coefficient
!   matrix.
!
!
! LATEST REVISION : 12/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, half, safmin, true, false, chol_cmp,   &
                         chol_solve, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED POSITIVE DEFINITE MATRIX
! AND m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX WHICH IS USED TO DERIVE THE POSITIVE
! DEFINITE MATRIX. m SHOULD BE GREATER OR EQUAL TO n.
!
    integer(i4b), parameter :: prtunit=6, n=1000, m=n+10
!
    character(len=*), parameter :: name_proc='Example 1 of chol_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, d1, tmp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, c
    real(stnd), dimension(:),   allocatable :: invdiag, b, x, res
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test, upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n SYMMETRIC DEFINITE POSITIVE MATRIX
!               AND ONE RIGHT HAND-SIDE WITH THE CHOLESKY DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = false
!
!   SPECIFY IF THE UPPER OR LOWER PART OF THE SYMMETRIC
!   DEFINITE POSITIVE MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( c(m,n), a(n,n), b(n), x(n), invdiag(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM SYMMETRIC POSITIVE DEFINITE MATRIX a .
!
    call random_number( c )
!
    c = c - half
!
    call random_number( tmp )
!
    if ( tmp>safmin ) then
        c = c/tmp
    end if
!
    a = matmul( transpose(c), c )
!
!   GENERATE A n-ELEMENT RANDOM SOLUTION VECTOR x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-VECTOR PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION VECTOR FOR SYMMETRIC POSITIVE DEFINITE
!   SYSTEM
!
!            a*x = b .
!
!   BY COMPUTING THE CHOLESKY DECOMPOSITION OF MATRIX a .
!   IF ON OUTPUT OF chol_cmp d1 IS DIFFERENT FROM ZERO
!   THEN THE SYMMETRIC LINEAR SYSTEM IS NOT SINGULAR
!   AND CAN BE SOLVED BY SUBROUTINE chol_solve.
!
    call chol_cmp( a, invdiag, d1, upper=upper )
!
    if ( d1==zero ) then
!
!       ANORMAL EXIT IN chol_cmp SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of CHOL_CMP subroutine, d1=', d1
        write (prtunit,*)
!
    else
!
!       SOLVE THE SYMMETRIC LINEAR SYSTEM.
!
        call chol_solve( a, invdiag, b, upper=upper )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1/=zero .and. do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n) = b(:n) - x(:n)
        err     = norm(res)/norm(x)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, c, x, invdiag, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, c, x, invdiag )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1/=zero ) then
!
        write (prtunit,*) name_proc//' is correct'
!
        if ( do_test ) then
!
            write (prtunit,*) 
!
!           PRINT RELATIVE ERROR OF COMPUTED SOLUTION.
!
            write (prtunit,*) 'Relative error of the computed solution = ', err
!
        end if
!
    else
!
        write (prtunit,*) name_proc//' is incorrect'
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for solving a real symmetric positive definite linear system of size ', &
       n, ' by ', n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_chol_cmp
! ===========================
!
end program ex1_chol_cmp

ex1_chol_cmp2.F90

program ex1_chol_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine CHOL_CMP2
!   in module Lin_Procedures for computing a Cholesky decomposition of a real
!   symmetric positive-definite matrix and the inverse of such matrix.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, one, c10, true, false,  &
                         chol_cmp2, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED POSITIVE DEFINITE MATRIX
! AND m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX WHICH IS USED TO DERIVE THE POSITIVE
! DEFINITE MATRIX. m SHOULD BE GREATER OR EQUAL TO n.
!
    integer(i4b), parameter :: prtunit=6, n=4000, m=n+10
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!
    real(stnd), parameter :: fudge=c10
!
    character(len=*), parameter :: name_proc='Example 1 of chol_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, d1, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, ainv, c
    real(stnd), dimension(:),   allocatable :: invdiag
!
    integer(i4b) :: j
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE INVERSE OF A REAL n-BY-n SYMMETRIC POSITIVE DEFINITE
!               MATRIX a BY USING THE CHOLESKY DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = false
!
!   SPECIFY IF THE UPPER OR LOWER PART OF THE SYMMETRIC
!   DEFINITE POSITIVE MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( c(m,n), a(n,n), ainv(n,n), invdiag(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM SYMMETRIC POSITIVE DEFINITE MATRIX a .
!
    call random_number( c )
!
    a = matmul( transpose(c), c )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE SYMMETRIC POSITIVE DEFINITE MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE INVERSE OF A SYMMETRIC POSITIVE DEFINITE
!   MATRIX a BY USING THE CHOLESKY DECOMPOSITION OF a.
!
!   IF ON OUTPUT OF chol_cmp2 d1 IS DIFFERENT FROM ZERO
!   THEN THE SYMMETRIC MATRIX IS NOT SINGULAR AND THE
!   SYMMETRIC INVERSE OF a HAS BEEN COMPUTED.
!
    call chol_cmp2( a, invdiag, d1, matinv=ainv, upper=upper, fill=true )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1==zero ) then
!
!       ANORMAL EXIT IN chol_cmp2 SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in the call to CHOL_CMP2 subroutine, d1=', d1
!
    else if ( do_test ) then
!
!       COMPUTE a TIMES ITS INVERSE - IDENTITY.
!
        c(:n,:n) = matmul( a2, ainv )
!
        do j = 1_i4b, n
!
            c(j,j) = c(j,j) - one
!
        end do
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        err = norm( c(:n,:n) ) /( real(n,stnd)*norm(a2(:n,:n)) )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
        deallocate( a, ainv, c, invdiag, a2 )
    else
        deallocate( a, ainv, c, invdiag )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1/=zero ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the inverse of a positive definite symmetric matrix of size ', &
       n, ' by ', n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_chol_cmp2
! ============================
!
end program ex1_chol_cmp2

ex1_comp_cor.F90

program ex1_comp_cor
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_COR
!   in module Mul_Stat_Procedures for computing univariate statistics and correlation
!   coefficients between a tri-dimensional array and a one-dimensional array.
!
!   All the statistics and correlations are computed with only one-pass on the data
!   in one or several steps with a very efficient algorithm for large datasets, which
!   also allows out-of-core computations.
!
!
! LATEST REVISION : 29/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_cor
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; n, m AND p ARE THE DIMENSIONS OF THE ARRAYS.
! p IS THE NUMBER OF OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, n=26, m=20, p=50
!
    character(len=*), parameter :: name_proc='Example 1 of comp_cor'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                      :: err_xstat, err_ystat, err_cor, xyn, eps
    real(stnd), dimension(n,m)      :: xycor1, xycor2
    real(stnd), dimension(2)        :: ystat1, ystat2
    real(stnd), dimension(n,m,2)    :: xstat1, xstat2
    real(stnd), dimension(n,m,p)    :: x
    real(stnd), dimension(p)        :: y
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM TRIDIMENSIONAL OBSERVATION ARRAY x .
!
    call random_number( x(:n,:m,:p) )
!
!   GENERATE A RANDOM OBSERVATION VECTOR y .
!
    call random_number( y(:p) )
!
!   FIRST COMPUTE THE CORRELATIONS BETWEEN ARRAYS x AND y
!   FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cor( x(:n,:m,:p), y(:p), first, last,                    &
                   xstat1(:n,:m,:2), ystat1(:2), xycor1(:n,:m), xyn )
!
!   ON EXIT OF COMP_COR WHEN last=true :
!
!      xstat1(i,j,1) CONTAINS THE MEAN VALUES OF THE ARRAY SECTION x(i,j,:p).
!
!      xstat1(i,j,2) CONTAINS THE VARIANCES OF THE ARRAY SECTION x(i,j,:p).
!
!      ystat1(1)     CONTAINS THE MEAN VALUE OF THE DATA VECTOR y(:p).
!
!      ystat1(2)     CONTAINS THE VARIANCE OF THE DATA VECTOR y(:p).
!
!      xycor1(i,j)   CONTAINS THE CORRELATION COEFFICIENT
!                    BETWEEN x(i,j,:p) AND y(:p).
!
!      xyn           CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAY
!                    x(:n,:m,:p) AND THE DATA VECTOR y(:p) (xyn=real(p,stnd) ).
!
!   SECOND RECOMPUTE CORRELATIONS BETWEEN ARRAYS x AND y
!   ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_cor( x(:n,:m,i:i), y(i:i), first, last,                 &
                       xstat2(:n,:m,:2), ystat2(:2), xycor2(:n,:m), xyn )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err_xstat = maxval( abs( ( xstat1-xstat2)/xstat1 ) )
    err_ystat = maxval( abs( ( ystat1-ystat2)/ystat1 ) )
    err_cor   = maxval( abs( xycor1-xycor2 ) )
!
    if ( max(err_xstat, err_ystat, err_cor )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_cor
! ===========================
!
end program ex1_comp_cor

ex1_comp_cor_miss.F90

program ex1_comp_cor_miss
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_COR_MISS
!   in module Mul_Stat_Procedures for computing univariate statistics and correlation
!   coefficients between a tri-dimensional array and a one-dimensional array, both
!   with missing values.
!
!   All the statistics and correlations are computed with only one-pass on the data
!   in one or several steps with a very efficient algorithm for large datasets, which
!   also allows out-of-core computations.
!
!
! LATEST REVISION : 29/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_cor_miss
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; n, m AND p ARE THE DIMENSIONS OF THE ARRAYS.
! p IS THE NUMBER OF OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, n=26, m=20, p=50
!
! miss IS THE MISSING INDICATOR IN THE DATA.
!
    real(stnd), parameter :: miss=-999.99_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of comp_cor_miss'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                      :: err_xstat, err_ystat, err_cor, eps
    real(stnd), dimension(n,m,4)    :: xycor1, xycor2
    real(stnd), dimension(4)        :: ystat1, ystat2
    real(stnd), dimension(n,m,4)    :: xstat1, xstat2
    real(stnd), dimension(n,m,p)    :: x
    real(stnd), dimension(p)        :: y
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM TRI-DIMENSIONAL OBSERVATION ARRAY x WITH MISSING VALUES.
!
    call random_number( x(:n,:m,:p) )
    where ( x(:n,:m,:p)<=0.05_stnd ) x(:n,:m,:p) = miss
!
!   GENERATE A RANDOM OBSERVATION VECTOR y WITH MISSING VALUES.
!
    call random_number( y(:p) )
    where ( y(:p)<=0.05_stnd ) y(:p) = miss
!
!   FIRST COMPUTE THE CORRELATIONS BETWEEN ARRAYS x AND y
!   FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cor_miss( x(:n,:m,:p), y(:p), first, last, xstat1(:n,:m,:4),   &
                        ystat1(:4), xycor1(:n,:m,:4), xymiss=miss )
!
!   ON EXIT OF COMP_COR_MISS WHEN last=true :
!
!      xstat1(i,j,1) CONTAINS THE MEAN VALUES OF THE ARRAY SECTION x(i,j,:p).
!
!      xstat1(i,j,2) CONTAINS THE VARIANCES OF THE ARRAY SECTION x(i,j,:p).
!
!      xstat1(i,j,3) CONTAINS THE NUMBER OF NON-MISSING OBSERVATIONS
!                    IN THE ARRAY SECTION x(i,j,:p).
!
!      ystat1(1)     CONTAINS THE MEAN VALUE OF THE DATA VECTOR y(:p).
!
!      ystat1(2)     CONTAINS THE VARIANCE OF THE DATA VECTOR y(:p).
!
!      ystat1(3)     CONTAINS THE NUMBER OF NON-MISSING OBSERVATIONS
!                    IN THE DATA VECTOR y(:p).
!
!      xycor1(i,j,1) CONTAINS THE CORRELATION COEFFICIENT BETWEEN x(i,j,:p) AND y(:p)
!                    COMPUTED WITH THE ABOVE UNIVARIATE STATISTICS.
!
!      xycor1(i,j,2) CONTAINS THE INCIDENCE VALUE BETWEEN x(i,j,:p) AND y(:p).
!                    xycor1(i,j,2) INDICATES THE NUMBER OF VALID PAIRS OF OBSERVATIONS
!                    WHICH WHERE USED IN THE CALCULATION OF xycor1(i,j,1) .
!
!   xstat1(:,:,4), ystat1(4) AND xycor1(:,:,3:4) ARE USED AS WORKSPACE AND CONTAIN NO USEFUL
!   INFORMATION ON OUTPUT OF comp_cor_miss.
!
!   SECOND RECOMPUTE CORRELATIONS BETWEEN ARRAYS x AND y
!   ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_cor_miss( x(:n,:m,i:i), y(i:i), first, last, xstat2(:n,:m,:4),       &
                            ystat2(:4), xycor2(:n,:m,:4), xymiss=miss )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AND CORRELATIONS AGREE.
!
    err_xstat = maxval( abs( ( xstat1(:n,:m,:3)-xstat2(:n,:m,:3))/xstat1(:n,:m,:3) ) )
    err_ystat = maxval( abs( ( ystat1(:3)-ystat2(:3))/ystat1(:3) ) )
    err_cor   = maxval( abs( xycor1(:n,:m,:2)-xycor2(:n,:m,:2) ) )
!
    if ( max(err_xstat, err_ystat, err_cor )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_cor_miss
! ================================
!
end program ex1_comp_cor_miss

ex1_comp_cor_miss2.F90

program ex1_comp_cor_miss2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_COR_MISS2
!   in module Mul_Stat_Procedures for computing univariate statistics and correlation
!   coefficients between a tri-dimensional array and a one-dimensional array, both
!   with missing values.
!
!   All the statistics and correlations are computed with only one-pass on the data
!   in one or several steps with a very efficient algorithm for large datasets, which
!   also allows out-of-core computations.
!
!
! LATEST REVISION : 29/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_cor_miss2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; n, m AND p ARE THE DIMENSIONS OF THE ARRAYS.
! p IS THE NUMBER OF OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, n=26, m=20, p=50
!
! miss IS THE MISSING INDICATOR.
!
    real(stnd), parameter :: miss=-999.99_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of comp_cor_miss2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                    :: err_xstat, err_ystat, err_cor, eps
    real(stnd), dimension(n,m)    :: xycor1, xycor2, xyn
    real(stnd), dimension(n,m,2)  :: xstat1, xstat2, ystat1, ystat2
    real(stnd), dimension(n,m,p)  :: x
    real(stnd), dimension(p)      :: y
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM TRIDIMENSIONAL OBSERVATION ARRAY x WITH MISSING VALUES.
!
    call random_number( x(:n,:m,:p) )
    where ( x(:n,:m,:p)<=0.05_stnd ) x(:n,:m,:p) = miss
!
!   GENERATE A RANDOM OBSERVATION VECTOR y WITH MISSING VALUES.
!
    call random_number( y(:p) )
    where ( y(:p)<=0.05_stnd ) y(:p) = miss
!
!   FIRST COMPUTE THE CORRELATIONS BETWEEN ARRAYS x AND y
!   FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cor_miss2( x(:n,:m,:p), y(:p), first, last, xstat1(:n,:m,:2),         &
                        ystat1(:n,:m,:2), xycor1(:n,:m), xyn(:n,:m), xymiss=miss )
!
!   ON EXIT OF COMP_COR_MISS2 WHEN last=true :
!
!      xstat1(i,j,1) CONTAINS THE MEAN VALUES OF THE ARRAY SECTION x(i,j,:p),
!                    COMPUTED FROM VALID PAIRS OF OBSERVATIONS FOR  x(i,j,:p) AND y(:p).
!
!      xstat1(i,j,2) CONTAINS THE VARIANCES OF THE ARRAY SECTION x(i,j,:p),
!                    COMPUTED FROM VALID PAIRS OF OBSERVATIONS FOR  x(i,j,:p) AND y(:p).
!
!      ystat1(i,j,1) CONTAINS THE MEAN VALUE OF THE DATA VECTOR y(:p),
!                    COMPUTED FROM VALID PAIRS OF OBSERVATIONS FOR  x(i,j,:p) AND y(:p).
!
!      ystat1(i,j,2) CONTAINS THE VARIANCE OF THE DATA VECTOR y(:p),
!                    COMPUTED FROM VALID PAIRS OF OBSERVATIONS FOR  x(i,j,:p) AND y(:p).
!
!      xycor1(i,j)   CONTAINS THE CORRELATION COEFFICIENT BETWEEN x(i,j,:p) AND y(:p)
!                    COMPUTED WITH THE ABOVE UNIVARIATE STATISTICS.
!
!      xyn(i,j)      CONTAINS THE INCIDENCE VALUE BETWEEN x(i,j,:p) AND y(:p).
!                    xycor1(i,j) INDICATES THE NUMBER OF VALID PAIRS OF OBSERVATIONS
!                    WHICH WHERE USED IN THE CALCULATION OF ALL THE ABOVE STATISTICS .
!
!
!   SECOND RECOMPUTE CORRELATIONS BETWEEN ARRAYS x AND y
!   ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_cor_miss2( x(:n,:m,i:i), y(i:i), first, last, xstat2(:n,:m,:2),        &
                            ystat2(:n,:m,:2), xycor2(:n,:m), xyn(:n,:m), xymiss=miss )
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AND CORRELATIONS AGREE.
!
    err_xstat = maxval( abs( ( xstat1(:n,:m,:2)-xstat2(:n,:m,:2))/xstat1(:n,:m,:2) ) )
    err_ystat = maxval( abs( ( ystat1(:n,:m,:2)-ystat2(:n,:m,:2))/ystat1(:n,:m,:2) ) )
    err_cor   = maxval( abs( xycor1(:n,:m)-xycor2(:n,:m) ) )
!
    if ( max(err_xstat, err_ystat, err_cor )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_cor_miss2
! =================================
!
end program ex1_comp_cor_miss2

ex1_comp_cormat.F90

program ex1_comp_cormat
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_CORMAT
!   in module Mul_Stat_Procedures for computing a correlation matrix from
!   a dataset.
!
!   The correlation matrix is computed with only one-pass on the dataset
!   in one or several steps with a very efficient algorithm for large datasets,
!   which also allows out-of-core computations.
!
!
! LATEST REVISION : 29/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, comp_cormat
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; m AND p ARE THE DIMENSIONS OF THE DATASET.
! m AND p ARE, RESPECTIVELY, THE NUMBER OF VARIABLES AND OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=500
!
    character(len=*), parameter :: name_proc='Example 1 of comp_cormat'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                 :: err_mean, err_std, err_cor, eps, xn
    real(stnd), dimension(m,m) :: cor1, cor2
    real(stnd), dimension(m,p) :: x
    real(stnd), dimension(m)   :: mean1, mean2, std1, std2
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last, cov, fill
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION ARRAY x .
!
    call random_number( x )
!
!   IF OPTIONAL ARGUMENT cov IS SET TO TRUE, comp_cormat WILL COMPUTE A
!   VARIANCES-COVARIANCES MATRIX INSTEAD OF A CORRELATION MATRIX ON EXIT.
!   BY DEFAULT, A CORRELATION MATRIX IS COMPUTED.
!
    cov  = false
!
!   IF OPTIONAL ARGUMENT fill IS SET TO TRUE, comp_cormat WILL FILL
!   THE LOWER TRIANGLE OF THE VARIANCES-COVARIANCES OR CORRELATION MATRIX
!   ON EXIT. BY DEFAULT, THE LOWER TRIANGLE IS NOT FILLED.
!
    fill = true
!
!   FIRST COMPUTE THE MEANS, STANDARD-DEVIATIONS AND CORRELATION MATRIX
!   OF x FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cormat( x(:m,:p), first, last, mean1(:m), cor1(:m,:m), xn,    &
                      xstd=std1(:m), cov=cov, fill=fill )
!
!   ON EXIT, WHEN last=true :
!
!     mean1(:m) CONTAINS THE VARIABLE MEANS COMPUTED FROM ALL OBSERVATIONS 
!     IN THE DATA MATRIX x .
!
!     cor1(:m,:m) CONTAINS THE UPPER TRIANGLE OF THE SYMETRIC CORRELATION
!     OR VARIANCE-COVARIANCE MATRIX AS CONTROLLED BY THE cov ARGUMENT.
!     IF THE OPTIONAL ARGUMENT fill IS PRESENT AND EQUAL TO true,
!     THE LOWER TRIANGLE OF cor1 IS ALSO FILLED. OTHERWISE, THE LOWER TRIANGLE
!     OF cor1 IS NOT MODIFIED.
!
!     xn INDICATES THE NUMBERS OF OBSERVATIONS WHICH WERE 
!     USED IN THE CALCULATION OF cor1 .
!
!     IF THE OPTIONAL ARGUMENT xstd IS PRESENT, xstd CONTAINS THE VARIABLE 
!     STANDARD-DEVIATIONS.
!
!   SECOND RECOMPUTE THE MEANS, STANDARD-DEVIATIONS AND CORRELATION MATRIX
!   OF x, ITERATIVELY  FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_cormat( x(:m,i:i), first, last, mean2(:m), cor2(:m,:m), xn,    &
                         xstd=std2(:m), cov=cov, fill=fill )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AND CORRELATION MATRICES AGREE.
!
    err_mean = maxval( abs( ( mean1-mean2)/mean1 ) )
    err_std  = maxval( abs( ( std1-std2)/std1    ) )
    err_cor  = maxval( abs( cor1-cor2 ) )
!
    if ( max(err_mean, err_std, err_cor )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_cormat
! ==============================
!
end program ex1_comp_cormat

ex1_comp_cormat_miss.F90

program ex1_comp_cormat_miss
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_CORMAT_MISS
!   in module Mul_Stat_Procedures for computing a correlation matrix from
!   a dataset with missing values.
!
!   The correlation matrix is computed with only one-pass on the dataset
!   in one or several steps with a very efficient algorithm for large datasets,
!   which also allows out-of-core computations.
!
!
! LATEST REVISION : 29/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, comp_cormat_miss
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; m AND p ARE THE DIMENSIONS OF THE DATASET.
! m AND p ARE, RESPECTIVELY, THE NUMBER OF VARIABLES AND OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=500, n=(m*(m+1))/2
!
! miss IS THE MISSING INDICATOR.
!
    real(stnd), parameter :: miss=-999.99_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of comp_cormat_miss'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                 :: err_mean, err_std, err_cor, eps
    real(stnd), dimension(m,m) :: cor1, cor2
    real(stnd), dimension(m,p) :: x
    real(stnd), dimension(n,3) :: xn
    real(stnd), dimension(m,2) :: mean1, mean2
    real(stnd), dimension(m)   :: std1, std2
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last, cov, fill
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION ARRAY x WITH MISSING VALUES.
!
    call random_number( x(:m,:p) )
    where ( x(:m,:p)<=0.05_stnd ) x(:m,:p) = miss
!
!   IF OPTIONAL ARGUMENT cov IS SET TO TRUE, comp_cormat_miss WILL COMPUTE A
!   VARIANCES-COVARIANCES MATRIX INSTEAD OF A CORRELATION MATRIX ON EXIT.
!   BY DEFAULT, A CORRELATION MATRIX IS COMPUTED.
!
    cov  = false
!
!   IF OPTIONAL ARGUMENT fill IS SET TO TRUE, comp_cormat_miss WILL FILL
!   THE LOWER TRIANGLE OF THE VARIANCES-COVARIANCES OR CORRELATION MATRIX
!   ON EXIT. BY DEFAULT, THE LOWER TRIANGLE IS NOT FILLED.
!
    fill = true
!
!   FIRST COMPUTE THE MEANS, STANDARD-DEVIATIONS AND CORRELATION MATRIX
!   OF x FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cormat_miss( x(:m,:p), first, last, mean1(:m,:2), cor1(:m,:m), xn(:n,:3), miss,   &
                           xstd=std1(:m), cov=cov, fill=fill )
!
!   ON EXIT, WHEN last=true :
!
!     mean1(:m,1) CONTAINS THE VARIABLE MEANS COMPUTED FROM ALL NON-MISSING OBSERVATIONS 
!     IN THE DATA MATRIX x. mean1(:m,2) IS USED AS WORKSPACE.
!
!     cor1(:m,:m) CONTAINS THE UPPER TRIANGLE OF THE SYMETRIC CORRELATION
!     OR VARIANCE-COVARIANCE MATRIX AS CONTROLLED BY THE cov ARGUMENT.
!     IF THE OPTIONAL ARGUMENT fill IS PRESENT AND EQUAL TO true,
!     THE LOWER TRIANGLE OF cor1 IS ALSO FILLED. OTHERWISE, THE LOWER TRIANGLE
!     OF cor1 IS NOT MODIFIED.
!
!     xn(:n,1) CONTAINS THE UPPER TRIANGLE OF THE MATRIX OF THE INCIDENCE VALUES
!     BETWEEN EACH PAIR OF VARIABLES, PACKED COLUMNWISE, IN A LINEAR ARRAY. 
!     xn(i + (j-1)*j/2,1) INDICATES THE NUMBERS OF NON-MISSING PAIRS WHICH WERE 
!     USED IN THE CALCULATION OF cor1(i,j) for 1<=i<=j .
!     xn(:n,2:3) IS USED AS WORKSPACE.
!
!     IF THE OPTIONAL ARGUMENT xstd IS PRESENT, xstd CONTAINS THE VARIABLE 
!     STANDARD-DEVIATIONS COMPUTED FROM ALL NON-MISSING OBSERVATIONS.
!
!   SECOND COMPUTE THE MEANS, STANDARD-DEVIATIONS AND CORRELATION MATRIX
!   OF x, ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_cormat_miss( x(:m,i:i), first, last, mean2(:m,:2), cor2(:m,:m), xn(:n,:3), miss,  &
                               xstd=std2(:m), cov=cov, fill=fill )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err_mean = maxval( abs( ( mean1(:m,1)-mean2(:m,1))/mean1(:m,1) ) )
    err_std  = maxval( abs( ( std1(:m)-std2(:m))/std1(:m)    ) )
    err_cor  = maxval( abs( cor1(:m,:m)-cor2(:m,:m) ) )
!
    if ( max(err_mean, err_std, err_cor )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_cormat_miss
! ===================================
!
end program ex1_comp_cormat_miss

ex1_comp_det.F90

program ex1_comp_det
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_DET
!   in module Lin_Procedures for computing the determinant of a real square
!   matrix using a LU decomposition.
!
!
! LATEST REVISION : 29/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, true, false, zero, one, comp_det, inv, merror, allocate_error
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SQUARE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=500
!
    character(len=*), parameter :: name_proc='Example 1 of comp_det'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: adet, ainvdet, err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, ainv
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE DETERMINANT OF A REAL MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( ainv(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE MATRIX INVERSE OF a.
!
        ainv = inv( a ) 
!
!       COMPUTE THE DETERMINANT OF MATRIX INVERSE .
!
        call comp_det( ainv, ainvdet )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( ainv )
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE DETERMINANT OF THE DATA MATRIX WITH SUBROUTINE comp_det.
!
    call comp_det( a, adet )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK det(a**-1)*det(a)**-1 = 1.
!
        err = abs(adet*ainvdet - one) / max( abs(adet), abs(ainvdet) )
!
    end if
!
!   DEALLOCATE WORK ARRAY.
!
    deallocate( a )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the determinant of a real matrix of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_comp_det
! ===========================
!
end program ex1_comp_det

ex1_comp_eof.F90

program ex1_comp_eof
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_EOF
!   and COMP_PC_EOF in module Mul_Stat_Procedures for performing a Principal
!   Component Analysis on a dataset.
!
!   COMP_EOF computes the means and the covariance (or correlation) matrix with only
!   one pass through the data.
!
!   COMP_EOF then computes all eigenvalues and eigenvectors of the covariance or
!   correlation matrix. If you are interested only in the leading eigenvectors,
!   you should use COMP_EOF2 subroutine instead of COMP_EOF.
!
!
! LATEST REVISION : 29/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, one, comp_eof, comp_pc_eof
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; m AND p ARE THE DIMENSIONS OF THE DATASET.
! m AND p ARE, RESPECTIVELY, THE NUMBER OF VARIABLES AND OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=50
!
    character(len=*), parameter :: name_proc='Example 1 of comp_eof'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                  :: err_pc, xn
    real(stnd), dimension(m,m)  :: eigvec
    real(stnd), dimension(m)    :: mean, std, eigval, eigvar, singval
    real(stnd), dimension(m,p)  :: x, x_std
    real(stnd), dimension(p,m)  :: pc
!
    logical(lgl) :: first, last, failure
!
    character    :: sort = 'd'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM OBSERVATION MATRIX x(:m,:p) WITH m VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( x )
!
!   FIRST COMPUTE EIGENVECTORS (E.G. EOFs) OF THE CORRELATION MATRIX.
!
    first = true
    last  = true
!
    call comp_eof( x(:m,:p), first, last, eigval(:m), eigvec(:m,:m), xn, failure,   &
                   sort=sort, xmean=mean(:m), xstd=std(:m), xeigvar=eigvar(:m)      )
!
!   ON EXIT OF COMP_EOF WHEN last=true :
!
!      eigval(:m)       CONTAINS THE EIGENVALUES.
!
!      eigvec(:m,:m)    CONTAINS THE EIGENVECTORS STORED COLUMNWISE IN THE ORDER
!                       OF THE EIGENVALUES STORED IN eigval.
!
!      xn               CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAY x,
!                       xn = real(p,stnd) .
!
!      failure = false  INDICATES SUCCESSFUL EXIT.
!      failure = true   INDICATES THAT FEWER THAN TWO VALID OBSERVATIONS
!                       WERE PRESENT OR THAT THE OBSERVATIONS ON SOME
!                       VARIABLE WERE CONSTANT AND THE CORRELATIONS
!                       WERE REQUESTED OR THAT MAXIMUM ACCURACY WAS NOT
!                       ACHIEVED WHEN COMPUTING THE EIGENVECTORS AND THE
!                       EIGENVALUES.
!
!      mean(:m)         CONTAINS THE MEAN VALUES OF THE m VARIABLES.
!
!      std(:m)          CONTAINS THE STANDARD-DEVIATIONS OF THE m VARIABLES.
!
!      eigvar(:m)       CONTAINS PERCENTAGES OF TOTAL VARIANCE ASSOCIATED 
!                       WITH THE EIGENVECTORS IN THE ORDER OF THE EIGENVALUES
!                       STORED IN eigval.
!
!
!   SECOND COMPUTE THE PRINCIPAL COMPONENTS FROM THE DATA AND THE EIGENVECTORS.
!
!   IN ORDER TO COMPUTE NORMALIZED PCs, SET singval(:m) = sqrt(eigval(:m)),
!   OTHERWISE SET singval(:) = one .
!
    singval(:) = one
!
!   THE OPTIONAL PARAMETERS xmean AND xstd ARE REQUIRED IF THE EIGENVECTORS
!   HAVE BEEN COMPUTED FROM THE CORRRELATION MATRIX.
!
    call comp_pc_eof( x(:m,:p), eigvec(:m,:m), singval(:m), pc(:p,:m),   &
                      xmean=mean(:m), xstd=std(:m)                       )
!
!   ON EXIT OF COMP_PC_EOF :
!
!      pc(:p,:m) CONTAINS THE PRINCIPAL COMPONENTS STORED COLUMNWISE.
!
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION x_std**t*eigvec - pc
!   WHERE eigvec ARE THE EIGENVECTORS, x_std THE STANDARDIZED DATA
!   AND pc THE UNNORMALIZED PRINCIPAL COMPONENTS.
!
    x_std(:m,:p) = x(:m,:p) - spread( mean(:m) ,      dim=2, ncopies=p )
    x_std(:m,:p) = x_std(:m,:p)*spread( one/std(:m) , dim=2, ncopies=p ) 
!
    err_pc =  sum( abs(matmul(transpose(x_std),eigvec)-pc) )/sum( eigval )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err_pc<=sqrt( epsilon(err_pc) ) ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_eof
! ===========================
!
end program ex1_comp_eof

ex1_comp_eof2.F90

program ex1_comp_eof2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_EOF2
!   and COMP_PC_EOF in module Mul_Stat_Procedures for performing a Principal
!   Component Analysis on a dataset.
!
!   COMP_EOF2 computes the means and the covariance (or correlation) matrix with only
!   one pass through the data.
!
!   COMP_EOF2 then computes all eigenvalues and only the leading eigenvectors of the covariance
!   or correlation matrix by inverse iteration.
!
!
! LATEST REVISION : 29/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, one, comp_eof2, comp_pc_eof
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; m AND p ARE THE DIMENSIONS OF THE DATASET.
! m AND p ARE, RESPECTIVELY, THE NUMBER OF VARIABLES AND OBSERVATIONS.
! neig IS THE NUMBER OF THE LEADING EIGENVECTORS WHICH MUST BE COMPUTED AND
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THESE EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=50, neig=3, maxiter=2, mm=(m*(m+1))/2
!
    character(len=*), parameter :: name_proc='Example 1 of comp_eof2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                    :: err_pc, xn
    real(stnd), dimension(m)      :: mean, std, eigval, eigvar, singval
    real(stnd), dimension(mm)     :: corp
    real(stnd), dimension(m,p)    :: x, x_std
    real(stnd), dimension(m,neig) :: eigvec
    real(stnd), dimension(p,neig) :: pc
!
    logical(lgl) :: first, last, failure
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM OBSERVATION MATRIX x(:m,:p) WITH m VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( x )
!
!   FIRST COMPUTE neig EOFs FROM THE CORRELATION MATRIX BY INVERSE ITERATION.
!
    first = true
    last  = true
!
    call comp_eof2( x(:m,:p), first, last, eigval(:m), corp(:mm), xn, failure,   &
                    maxiter=maxiter, xmean=mean(:m), xstd=std(:m),               &
                    xeigvar=eigvar(:m), xeigvec=eigvec(:m,:neig)                 )
!
!   ON EXIT OF COMP_EOF2 WHEN last=true :
!
!      eigval(:m)       CONTAINS THE EIGENVALUES.
!
!      failure = false  INDICATES SUCCESSFUL EXIT.
!      failure = true   INDICATES THAT FEWER THAN TWO VALID OBSERVATIONS
!                       WERE PRESENT OR THAT THE OBSERVATIONS ON SOME
!                       VARIABLE WERE CONSTANT AND THE CORRELATIONS
!                       WERE REQUESTED OR THAT MAXIMUM ACCURACY WAS NOT
!                       ACHIEVED WHEN COMPUTING THE EIGENVALUES OR THAT
!                       SOME EIGENVECTORS FAILED TO CONVERGE WITH maxiter
!                       INVERSE ITERATIONS.
!
!      xn               CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAY x,
!                       xn = real(p,stnd) .
!
!      mean(:m)         CONTAINS THE MEAN VALUES OF THE m VARIABLES.
!
!      std(:m)          CONTAINS THE STANDARD-DEVIATIONS OF THE m VARIABLES.
!
!      eigvar(:m)       CONTAINS PERCENTAGES OF TOTAL VARIANCE ASSOCIATED 
!                       WITH THE EIGENVECTORS IN THE ORDER OF THE EIGENVALUES
!                       STORED IN eigval.
!
!      eigvec(:m,:neig) CONTAINS THE FIRST neig EIGENVECTORS STORED COLUMNWISE
!                       IN THE ORDER OF THE EIGENVALUES STORED IN eigval.
!
!
!   SECOND COMPUTE THE PRINCIPAL COMPONENTS FROM THE DATA AND THE EIGENVECTORS.
!
!   IN ORDER TO COMPUTE NORMALIZED PCs, SET singval(:m) = sqrt(eigval(:m)),
!   OTHERWISE SET singval(:) = one .
!
    singval(:) = one
!
!   THE OPTIONAL PARAMETERS xmean AND xstd ARE REQUIRED IF THE EIGENVECTORS
!   HAVE BEEN COMPUTED FROM THE CORRRELATION MATRIX.
!
    call comp_pc_eof( x(:m,:p), eigvec(:m,:neig), singval(:neig), pc(:p,:neig),   &
                      xmean=mean(:m), xstd=std(:m)                                )
!
!   ON EXIT OF COMP_PC_EOF :
!
!      pc(:p,:neig) CONTAINS THE FIRST neig PRINCIPAL COMPONENTS STORED COLUMNWISE.
!
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION x_std**t*eigvec - pc
!   WHERE eigvec ARE THE EIGENVECTORS, x_std THE STANDARDIZED DATA
!   AND pc THE UNNORMALIZED PRINCIPAL COMPONENTS.
!
    x_std(:m,:p) = x(:m,:p) - spread( mean(:m) ,      dim=2, ncopies=p )
    x_std(:m,:p) = x_std(:m,:p)*spread( one/std(:m) , dim=2, ncopies=p ) 
!
    err_pc =  sum( abs(matmul(transpose(x_std),eigvec)-pc) )/sum( eigval(:neig) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err_pc<=sqrt( epsilon(err_pc) ) ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_eof2
! ============================
!
end program ex1_comp_eof2

ex1_comp_filt_rot_pc.F90

program ex1_comp_filt_rot_pc
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_EOF,
!   COMP_PC_EOF and COMP_FILT_ROT_PC in module Mul_Stat_Procedures for
!   rotating a Principal Component Analysis model towards specific frequency modes.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, one, comp_eof, comp_pc_eof, &
                         comp_filt_rot_pc, norm
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, p IS THE NUMBER OF OBSERVATIONS, m THE NUMBER OF VARIABLES
! AND nrot IS THE NUMBER OF EIGENVECTORS OR PCS TO ROTATE.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=100, nrot=5
!
    character(len=*), parameter :: name_proc='Example 1 of comp_filt_rot_pc'
!
    character, parameter :: sort = 'd'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                         :: eps, err_pc, err_rot1, err_rot2, err_rot3, err_rot4, xn
    real(stnd), dimension(m,m)         :: eigvec
    real(stnd), dimension(m,nrot)      :: factor, rot_factor
    real(stnd), dimension(nrot,nrot)   :: rot
    real(stnd), dimension(m)           :: mean, std, eigval, eigvar, singval
    real(stnd), dimension(nrot)        :: std_rot_pc
    real(stnd), dimension(m,p)         :: x, x_std
    real(stnd), dimension(p,m)         :: pc
    real(stnd), dimension(p,nrot)      :: rot_pc
!
    integer(i4b) :: pl, ph
!
    logical(lgl) :: first, last, cov, failure, failure2
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PERFORM A PRINCIPAL COMPONENT (E.G., EOF) ANALYSIS AND AN
!               ORTHOGONAL ROTATION OF THE PRINCIPAL COMPONENT TIME SERIES
!               TOWARDS A SPECIFIC FREQUENCY BAND.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION MATRIX x(:m,:p) WITH m VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( x )
!
!   COMPUTE EOFs FROM THE CORRELATION MATRIX.
!
    cov = false
!
    first = true
    last  = true
!
    call comp_eof( x(:m,:p), first, last, eigval(:m), eigvec(:m,:m), xn, failure,       &
                   cov=cov, sort=sort, xmean=mean(:m), xstd=std(:m), xeigvar=eigvar(:m) )
!
!   ON EXIT OF COMP_EOF WHEN last=true :
!
!      eigval(:m)       CONTAINS THE EIGENVALUES.
!
!      eigvec(:m,:m)    CONTAINS THE EIGENVECTORS STORED COLUMNWISE IN THE ORDER
!                       OF THE EIGENVALUES STORED IN eigval.
!
!      xn               CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAY x,
!                       xn = real(p,stnd) .
!
!      failure = false  INDICATES SUCCESSFUL EXIT.
!      failure = true   INDICATES THAT FEWER THAN TWO VALID OBSERVATIONS
!                       WERE PRESENT OR THAT THE OBSERVATIONS ON SOME
!                       VARIABLE WERE CONSTANT AND THE CORRELATIONS
!                       WERE REQUESTED OR THAT MAXIMUM ACCURACY WAS NOT
!                       ACHIEVED WHEN COMPUTING THE EIGENVECTORS AND THE
!                       EIGENVALUES.
!
!      mean(:m)         CONTAINS THE MEAN VALUES OF THE m VARIABLES.
!
!      std(:m)          CONTAINS THE STANDARD-DEVIATIONS OF THE m VARIABLES.
!
!      eigvar(:m)       CONTAINS PERCENTAGES OF TOTAL VARIANCE ASSOCIATED 
!                       WITH THE EIGENVECTORS IN THE ORDER OF THE EIGENVALUES
!                       STORED IN eigval.
!
!
!   COMPUTE THE PRINCIPAL COMPONENTS FROM THE DATA AND THE EIGENVECTORS.
!
!   IN ORDER TO COMPUTE NORMALIZED PCs, SET singval(:m) = sqrt(eigval(:m)),
!   OTHERWISE SET singval(:) = one . sqrt(eigval(:m)) GIVES THE STANDARD-DEVIATIONS
!   ACCOUNTED FOR BY THE PC time series.
!
    singval(:m) = sqrt( eigval(:m) )
!
!   THE OPTIONAL PARAMETERS xmean AND xstd ARE REQUIRED IF THE EIGENVECTORS
!   HAVE BEEN COMPUTED FROM THE CORRELATION MATRIX.
!
    call comp_pc_eof( x(:m,:p), eigvec(:m,:m), singval(:m), pc(:p,:m),   &
                      xmean=mean(:m), xstd=std(:m)                       )
!
!   ON EXIT OF COMP_PC_EOF :
!
!      pc(:p,:m) CONTAINS THE (STANDARDIZED) PRINCIPAL COMPONENTS STORED COLUMNWISE.
!
!   COMPUTE THE FIRST nrot CORRESPONDING FACTORS FROM THE EIGENVECTORS AND THE SINGULAR VALUES.
!
    factor(:m,:nrot) = eigvec(:m,:nrot)*spread( singval(:nrot), dim=1, ncopies=m )
!
!   NOW ROTATE THE FIRST nrot PC TIME SERIES WITH SUBROUTINE comp_filt_rot_pc.
!
!   SPECIFY THE WINDOWED FILTER TO BE USED FOR THE ORTHOGONAL ROTATION.
!
    pl = 0
    ph = 24
!
    call comp_filt_rot_pc( pc(:p,:nrot), singval(:nrot), pl, ph, rot_pc(:p,:nrot),  &
                           rot(:nrot,:nrot), std_rot_pc(:nrot), failure2 )
!
!   NOW ROTATE THE FIRST nrot FACTORS.
!
    rot_factor(:m,:nrot) = matmul( factor(:m,:nrot), rot(:nrot,:nrot) )
!
!   RESCALE THE ORIGINAL PC TIME SERIES FOR THE TESTS.
!
    pc(:p,:m) = pc(:p,:m)*spread( singval(:m), dim=1, ncopies=p )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION x_std**(t)*eigvec - pc
!   WHERE eigvec ARE THE EIGENVECTORS, x_std THE STANDARDIZED DATA
!   AND pc THE UNNORMALIZED PRINCIPAL COMPONENTS.
!
    x_std(:m,:p) = x(:m,:p) - spread( mean(:m) ,      dim=2, ncopies=p )
    x_std(:m,:p) = x_std(:m,:p)*spread( one/std(:m) , dim=2, ncopies=p ) 
!
    err_pc =  sum( abs( matmul(transpose(x_std),eigvec) - pc ) )
!
!   CHECK THAT THE VARIANCE EXPLAINED BY THE ORIGINAL AND ROTATED EOFS IS THE SAME.
!
    err_rot1 = abs( sum( std_rot_pc(:nrot)**2 ) - sum( eigval(:nrot) ) )
!
!   CHECK ORTHOGONALITY OF THE ROTATION MATRIX rot COMPUTED BY comp_filt_rot_eof
!   SUBROUTINE.
!
    err_rot2 = abs( sum( abs( matmul(transpose(rot),rot) ) ) - real(nrot,stnd) )
!
!   CHECK THAT THE NORMS OF THE ROTATED PCS ARE UNCHANGED.
!
    err_rot3 =  maxval( abs( norm( rot_pc(:p,:nrot), dim=2 ) - sqrt( real(p,stnd) ) ) )
!
!   CHECK THE COMPUTATION OF VARIANCES EXPLAINED BY THE ROTATED PCS.
!
    err_rot4 = maxval( abs( std_rot_pc(:nrot) - norm( rot_factor(:m,:nrot), dim=2 ) ) )
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( max(err_pc,err_rot1,err_rot2,err_rot3,err_rot4)<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_filt_rot_pc
! ===================================
!
end program ex1_comp_filt_rot_pc

ex1_comp_ginv.F90

program ex1_comp_ginv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_GINV
!   in module SVD_Procedures for computing a generalized inverse of a real matrix.
!                                                                              
!   The generalized inverse is computed with the SVD of the real matrix and the
!   computations are parallelized if OpenMP is used.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, comp_ginv, norm, &
                         c10, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=2000, k=min(m,n)
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c10
!
    character(len=*), parameter :: name_proc='Example 1 of comp_ginv'
!   
!   
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: eps, anorm, err, err1, err2, err3, err4, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, ainv, ainv2, a_by_ainv, ainv_by_a
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: failure, do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTING THE GENERALIZED INVERSE OF A m-BY-n REAL MATRIX USING
!               THE SINGULAR VALUE DECOMPOSITION (SVD) OF THE MATRIX. THE SVD IS COMPUTED
!               BY THE BIDIAGONAL QR IMPLICIT METHOD WITH A WAVE-FRONT ALGORITHM FOR
!               APPLYING GIVENS ROTATIONS IN THE BIDIAGONAL QR ALGORITHM AND,
!               OPTIONALLY, A PERFECT SHIFT FOR THE SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*sqrt( epsilon(eps) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), ainv(n,m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX.
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), ainv2(n,m), a_by_ainv(m,m),   &
                  ainv_by_a(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
!       COMPUTE THE FROBENIUS NORM OF THE DATA MATRIX.
!
        anorm = norm( a )
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE GENERALIZED INVERSE OF a(:m,:n) WITH SUBROUTINE comp_ginv.
!   THE GENERALIZED INVERSE IS COMPUTED WITH THE HELP OF THE SINGULAR
!   VALUE DECOMPOSITION (SVD) OF a(:m,:n).
!
    call comp_ginv( a, failure, ainv )
!
!   THE ROUTINE RETURNS THE GENERALIZED INVERSE OF a(:m,:n).
!
!   ON EXIT OF comp_ginv :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       COMPUTE ainv*a AND a*ainv*a .
!
        ainv_by_a = matmul( ainv, a2 )
        a = matmul( a2, ainv_by_a )
!
!       COMPUTE a*ainv AND ainv*a*ainv .
!
        a_by_ainv = matmul( a2, ainv )
        ainv2 = matmul( ainv, a_by_ainv )
!
!       CHECK THE Moore-Penrose EQUATIONS :
!
!             a*ainv*a = a            (1)
!          ainv*a*ainv = ainv         (2)
!            (a*ainv)' = a*ainv       (3)
!            (ainv*a)' = ainv*a       (4)
!
        err1 = norm( a - a2 )
        err2 = norm( ainv - ainv2 )
        err3 = norm( a_by_ainv - transpose(a_by_ainv) )
        err4 = norm( ainv_by_a - transpose(ainv_by_a) )
!
        err = max( err1, err2, err3, err4 )/ ( real(k,stnd)*anorm )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, ainv2, a_by_ainv, ainv_by_a )        
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, ainv )
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from comp_ginv() ) = ', failure
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the generalized inverse of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_comp_ginv
! ============================
!
end program ex1_comp_ginv

ex1_comp_inv.F90

program ex1_comp_inv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_INV
!   in module Lin_Procedures for computing the inverse of a real matrix.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    USE Statpack, ONLY : lgl, i4b, stnd, true, false, zero, one, comp_inv,   &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT AND n IS THE DIMENSION OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=4000
!
    character(len=*), parameter :: name_proc='Example 1 of comp_inv'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, ainv, res
!
    integer(i4b) :: j
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, failure
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE IN PLACE THE INVERSE OF A REAL n-BY-n MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAY.
!
    allocate( ainv(n,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
! 
!   GENERATE A RANDOM REAL MATRIX.
!
    call random_number( ainv )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
! 
!       SAVE THE RANDOM REAL MATRIX.
!
        a(:n,:n) = ainv(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE MATRIX INVERSE WITH SUBROUTINE comp_inv.
!   INPUT ARGUMENT OVERWRITTEN.
!
    call comp_inv( ainv, failure )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( failure ) then
!
!       ANORMAL EXIT FROM comp_inv SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of COMP_INV subroutine, failure=', failure
        write (prtunit,*)
!
    else if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE a TIMES ITS INVERSE - IDENTITY.
!
        res = matmul( a, ainv )
!
        do j = 1_i4b, n
            res(j,j) = res(j,j) - one
        end do
!
        err = norm( res(:n,:n) ) /( real(n,stnd)*norm(a(:n,:n)) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( allocated(a) ) then
        deallocate( ainv, a )
    else
        deallocate( ainv )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing in place the inverse of a real matrix of size ', &
       n, ' by ', n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_comp_inv
! ===========================
!
end program ex1_comp_inv

ex1_comp_lfc_rot_pc.F90

program ex1_comp_lfc_rot_pc
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_EOF,
!   COMP_PC_EOF and COMP_LFC_ROT_PC in module Mul_Stat_Procedures for
!   rotating a Principal Component Analysis model towards low or high frequency modes.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, one, comp_eof, comp_pc_eof, &
                         comp_lfc_rot_pc, norm
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, p IS THE NUMBER OF OBSERVATIONS, m THE NUMBER OF VARIABLES
! AND nrot IS THE NUMBER OF EIGENVECTORS OR PCS TO ROTATE.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=100, nrot=5
!
    character(len=*), parameter :: name_proc='Example 1 of comp_lfc_rot_pc'
!
    character, parameter :: sort = 'd'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                         :: eps, err_pc, err_rot1, err_rot2, err_rot3, err_rot4, xn
    real(stnd), dimension(m,m)         :: eigvec
    real(stnd), dimension(m,nrot)      :: factor, rot_factor
    real(stnd), dimension(nrot,nrot)   :: rot
    real(stnd), dimension(m)           :: mean, std, eigval, eigvar, singval
    real(stnd), dimension(nrot)        :: std_rot_pc
    real(stnd), dimension(m,p)         :: x, x_std
    real(stnd), dimension(p,m)         :: pc
    real(stnd), dimension(p,nrot)      :: rot_pc
!
    integer(i4b) :: nt
!
    logical(lgl) :: first, last, cov, failure, failure2
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PERFORM A PRINCIPAL COMPONENT (E.G., EOF) ANALYSIS AND AN
!               ORTHOGONAL ROTATION OF THE PRINCIPAL COMPONENT TIME SERIES
!               TOWARDS LOW-FREQUENCY MODES.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION MATRIX x(:m,:p) WITH m VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( x )
!
!   COMPUTE EOFs FROM THE CORRELATION MATRIX.
!
    cov = false
!
    first = true
    last  = true
!
    call comp_eof( x(:m,:p), first, last, eigval(:m), eigvec(:m,:m), xn, failure,       &
                   cov=cov, sort=sort, xmean=mean(:m), xstd=std(:m), xeigvar=eigvar(:m) )
!
!   ON EXIT OF COMP_EOF WHEN last=true :
!
!      eigval(:m)       CONTAINS THE EIGENVALUES.
!
!      eigvec(:m,:m)    CONTAINS THE EIGENVECTORS STORED COLUMNWISE IN THE ORDER
!                       OF THE EIGENVALUES STORED IN eigval.
!
!      xn               CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAY x,
!                       xn = real(p,stnd) .
!
!      failure = false  INDICATES SUCCESSFUL EXIT.
!      failure = true   INDICATES THAT FEWER THAN TWO VALID OBSERVATIONS
!                       WERE PRESENT OR THAT THE OBSERVATIONS ON SOME
!                       VARIABLE WERE CONSTANT AND THE CORRELATIONS
!                       WERE REQUESTED OR THAT MAXIMUM ACCURACY WAS NOT
!                       ACHIEVED WHEN COMPUTING THE EIGENVECTORS AND THE
!                       EIGENVALUES.
!
!      mean(:m)         CONTAINS THE MEAN VALUES OF THE m VARIABLES.
!
!      std(:m)          CONTAINS THE STANDARD-DEVIATIONS OF THE m VARIABLES.
!
!      eigvar(:m)       CONTAINS PERCENTAGES OF TOTAL VARIANCE ASSOCIATED 
!                       WITH THE EIGENVECTORS IN THE ORDER OF THE EIGENVALUES
!                       STORED IN eigval.
!
!
!   COMPUTE THE PRINCIPAL COMPONENTS FROM THE DATA AND THE EIGENVECTORS.
!
!   IN ORDER TO COMPUTE NORMALIZED PCs, SET singval(:m) = sqrt(eigval(:m)),
!   OTHERWISE SET singval(:) = one . sqrt(eigval(:m)) GIVES THE STANDARD-DEVIATIONS
!   ACCOUNTED FOR BY THE PC time series.
!
    singval(:m) = sqrt( eigval(:m) )
!
!   THE OPTIONAL PARAMETERS xmean AND xstd ARE REQUIRED IF THE EIGENVECTORS
!   HAVE BEEN COMPUTED FROM THE CORRELATION MATRIX.
!
    call comp_pc_eof( x(:m,:p), eigvec(:m,:m), singval(:m), pc(:p,:m),   &
                      xmean=mean(:m), xstd=std(:m)                       )
!
!   ON EXIT OF COMP_PC_EOF :
!
!      pc(:p,:m) CONTAINS THE (STANDARDIZED) PRINCIPAL COMPONENTS STORED COLUMNWISE.
!
!   COMPUTE THE FIRST nrot CORRESPONDING FACTORS FROM THE EIGENVECTORS AND THE SINGULAR VALUES.
!
    factor(:m,:nrot) = eigvec(:m,:nrot)*spread( singval(:nrot), dim=1, ncopies=m )
!
!   NOW ROTATE THE FIRST nrot PC TIME SERIES WITH SUBROUTINE comp_lfc_rot_pc.
!
!   SPECIFY THE SIZE OF THE LOESS SMOOTHER TO BE USED FOR THE ORTHOGONAL ROTATION.
!
    nt = 5
!
    call comp_lfc_rot_pc( pc(:p,:nrot), singval(:nrot), nt, rot_pc(:p,:nrot), rot(:nrot,:nrot), &
                           std_rot_pc(:nrot), failure2 )
!
!   NOW ROTATE THE FIRST nrot FACTORS.
!
    rot_factor(:m,:nrot) = matmul( factor(:m,:nrot), rot(:nrot,:nrot) )
!
!   RESCALE THE ORIGINAL PC TIME SERIES FOR THE TESTS.
!
    pc(:p,:m) = pc(:p,:m)*spread( singval(:m), dim=1, ncopies=p )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION x_std**(t)*eigvec - pc
!   WHERE eigvec ARE THE EIGENVECTORS, x_std THE STANDARDIZED DATA
!   AND pc THE UNNORMALIZED PRINCIPAL COMPONENTS.
!
    x_std(:m,:p) = x(:m,:p) - spread( mean(:m) ,      dim=2, ncopies=p )
    x_std(:m,:p) = x_std(:m,:p)*spread( one/std(:m) , dim=2, ncopies=p ) 
!
    err_pc =  sum( abs( matmul(transpose(x_std),eigvec) - pc ) )
!
!   CHECK THAT THE VARIANCE EXPLAINED BY THE ORIGINAL AND ROTATED EOFS IS THE SAME.
!
    err_rot1 = abs( sum( std_rot_pc(:nrot)**2 ) - sum( eigval(:nrot) ) )
!
!   CHECK ORTHOGONALITY OF THE ROTATION MATRIX rot COMPUTED BY comp_filt_rot_eof
!   SUBROUTINE.
!
    err_rot2 = abs( sum( abs( matmul(transpose(rot),rot) ) ) - real(nrot,stnd) )
!
!   CHECK THAT THE NORMS OF THE ROTATED PCS ARE UNCHANGED.
!
    err_rot3 =  maxval( abs( norm( rot_pc(:p,:nrot), dim=2 ) - sqrt( real(p,stnd) ) ) )
!
!   CHECK THE COMPUTATION OF VARIANCES EXPLAINED BY THE ROTATED PCS.
!
    err_rot4 = maxval( abs( std_rot_pc(:nrot) - norm( rot_factor(:m,:nrot), dim=2 ) ) )
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( max(err_pc,err_rot1,err_rot2,err_rot3,err_rot4)<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_lfc_rot_pc
! ==================================
!
end program ex1_comp_lfc_rot_pc

ex1_comp_mca.F90

program ex1_comp_mca
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_MCA
!   and COMP_PC_MCA in module Mul_Stat_Procedures for computing a Maximum
!   Covariance Analyis (MCA) between two datasets.
!
!   COMP_MCA computes the covariance (or correlation) matrix between the two datasets
!   with only one pass through the data.
!
!   COMP_MCA then computes all singular values and vectors of the covariance (or
!   correlation) matrix. If you are interested only in the leading singular vectors,
!   you should use COMP_MCA2 subroutine instead of COMP_MCA.
!
!
! LATEST REVISION : 29/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, one, comp_mca, comp_pc_mca
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! mx AND p ARE THE DIMENSIONS OF THE FIRST DATASET,
! my AND p ARE THE DIMENSIONS OF THE SECOND DATASET,
! mx, my AND p ARE, RESPECTIVELY, THE NUMBER OF VARIABLES AND OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, mx=20, my=10, m=min(mx,my), p=50, mm=(m*(m+1))/2
!
    character(len=*), parameter :: name_proc='Example 1 of comp_mca'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                         :: err_xpc, err_ypc, xyn
    real(stnd), dimension(mx,my)       :: xsingvec
    real(stnd), dimension(my,m)        :: ysingvec
    real(stnd), dimension(mx,2)        :: xstat
    real(stnd), dimension(my,2)        :: ystat
    real(stnd), dimension(m)           :: xysingval, xysingvar
    real(stnd), dimension(mm)          :: pccorp_x, pccorp_y
    real(stnd), dimension(mx,m)        :: xpccor
    real(stnd), dimension(my,m)        :: ypccor
    real(stnd), dimension(p,m)         :: xpc, ypc
    real(stnd), dimension(mx,p)        :: x, x_std
    real(stnd), dimension(my,p)        :: y, y_std
!
    logical(lgl) :: first, last, failure
!
    character    :: sort = 'd'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM OBSERVATION MATRIX x(:mx,:p) WITH mx VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( x )
!
!   GENERATE A RANDOM OBSERVATION MATRIX y(:my,:p) WITH my VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( y )
!
!   FIRST COMPUTE LEFT AND RIGHT SINGULAR VECTORS OF THE CORRELATION MATRIX.
!
    first = true
    last  = true
!
    call comp_mca( x, y, first, last, xstat, ystat, xysingval, xsingvec, failure,    &
                   sort=sort, ysingvec=ysingvec, xysingvar=xysingvar  )
!
!   ON EXIT OF COMP_MCA WHEN last=true :
!
!   xstat CONTAINS THE FOLLOWING STATISTICS ON ALL VARIABLES FROM THE x MATRIX:
!            
!       xstat(:,1) CONTAINS THE MEAN VALUES OF THE "LEFT" DATA MATRIX x.
!       xstat(:,2) CONTAINS THE STANDARD-DEVIATIONS OF THE "LEFT" DATA MATRIX x.
!
!   ystat CONTAINS THE FOLLOWING STATISTICS ON ALL VARIABLES FROM THE y MATRIX:
!            
!       ystat(:,1) CONTAINS THE MEAN VALUES OF THE "RIGHT" DATA MATRIX y.
!       ystat(:,2) CONTAINS THE STANDARD-DEVIATIONS OF THE "RIGHT" DATA MATRIX y.
!            
!   xysingval CONTAINS THE m SINGULAR VALUES OF THE CORRELATION
!   (OR COVARIANCE) MATRIX BETWEEN THE DATA MATRICES x AND y.
!
!   xsingvec IS OVERWRITTEN WITH THE FIRST m LEFT SINGULAR VECTORS
!   OF THE CORRELATION (OR COVARIANCE) MATRIX BETWEEN x AND y.
!
!   failure = FALSE :  INDICATES SUCCESSFUL EXIT.
!   failure = TRUE  :  INDICATES THAT FEWER THAN TWO VALID OBSERVATIONS
!                      WERE PRESENT OR THAT MAXIMUM ACCURACY WAS NOT
!                      ACHIEVED WHEN COMPUTING THE SVD OF THE COVARIANCE
!                      (OR CORRELATION) MATRIX BETWEEN THE DATA MATRICES x AND y .
!
!   ysingvec CONTAINS THE FIRST m RIGHT SINGULAR VECTORS OF THE CORRELATION
!            (OR COVARIANCE) MATRIX BETWEEN x AND y.
!
!   xysingvar CONTAINS THE PERCENTAGES OF TOTAL SQUARED COVARIANCE ASSOCIATED 
!             WITH THE LEFT AND RIGHT SINGULAR VECTORS IN ORDER OF THE 
!             SINGULAR VALUES STORED IN xysingval.
!
!
!   NOW, COMPUTE THE LEFT SINGULAR VARIABLES FROM THE DATA AND THE LEFT SINGULAR VECTORS.
!
!   THE OPTIONAL PARAMETERS xmean AND xstd ARE REQUIRED IF THE SINGULAR VECTORS
!   HAVE BEEN COMPUTED FROM THE CORRRELATION MATRIX.
!
    first = true
    last  = true
!
    call comp_pc_mca( x(:mx,:p), xsingvec(:mx,:m), first, last,         &
                      xpccor(:mx,:m), pccorp_x(:mm), xpc(:p,:m), xyn,   &
                      xmean=xstat(:mx,1), xstd=xstat(:mx,2)             )
!
!   ON EXIT OF COMP_PC_MCA WHEN last=true :
!
!     xpccor  CONTAINS :
!                 - THE CORRELATIONS BETWEEN THE DATA MATRIX x
!                   AND THE SINGULAR VARIABLES IF THE OPTIONAL
!                   ARGUMENTS xmean AND xstd ARE PRESENT.
!                 - THE COVARIANCES BETWEEN THE DATA MATRIX x
!                   AND THE NORMALIZED SINGULAR VARIABLES IF ONLY
!                   THE OPTIONAL ARGUMENT xmean IS PRESENT.
!
!     pccorp_x CONTAINS THE CORRELATION MATRIX COR BETWEEN THE SINGULAR VARIABLES
!              STORED IN ARGUMENT xpc. COR IS STORED IN SYMMETRIC STORAGE MODE.
!              MORE PRECISELY, THE J-TH COLUMN OF THIS MATRIX COR IS STORED IN THE 
!              ARRAY pccorp_x AS FOLLOWS:
!
!                         pccorp_x(i + (j-1)*j/2) = COR(i,j) for 1<=i<=j;
!
!     xpc      CONTAINS THE UNNORMALIZED SINGULAR VARIABLES DERIVED
!              FROM x AND xsingvec.
!
!   FINALLY, COMPUTE THE RIGHT SINGULAR VARIABLES FROM THE DATA AND THE RIGHT SINGULAR VECTORS.
!
    call comp_pc_mca( y(:my,:p), ysingvec(:my,:m), first, last,        &
                      ypccor(:my,:m), pccorp_y(:mm), ypc(:p,:m), xyn,  &
                      xmean=ystat(:my,1), xstd=ystat(:my,2)            )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION x_std**t*xsingvec - xpc
!   WHERE xsingvec ARE THE LEFT SINGULAR VECTORS, x_std THE STANDARDIZED LEFT DATA
!   AND xpc THE UNNORMALIZED LEFT SINGULAR VARIABLES.
!
    x_std(:mx,:p) = x(:mx,:p) - spread( xstat(:mx,1) ,   dim=2, ncopies=p )
    x_std(:mx,:p) = x_std(:mx,:p)*spread( one/xstat(:mx,2) , dim=2, ncopies=p ) 
!
    err_xpc = sum( abs(matmul(transpose(x_std),xsingvec)-xpc) )/sum( abs(x_std) )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION y_std**t*ysingvec - ypc
!   WHERE ysingvec ARE THE RIGHT SINGULAR VECTORS, y_std THE STANDARDIZED RIGHT DATA
!   AND ypc THE UNNORMALIZED RIGHT SINGULAR VARIABLES.
!
    y_std(:my,:p) = y(:my,:p) - spread( ystat(:my,1) ,   dim=2, ncopies=p )
    y_std(:my,:p) = y_std(:my,:p)*spread( one/ystat(:my,2) , dim=2, ncopies=p ) 
!
    err_ypc = sum( abs(matmul(transpose(y_std),ysingvec)-ypc) )/sum( abs(y_std) )
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( max(err_xpc, err_ypc)<=sqrt( epsilon(err_ypc) ) ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_mca
! ===========================
!
end program ex1_comp_mca

ex1_comp_mca2.F90

program ex1_comp_mca2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_MCA2
!   and COMP_PC_MCA in module Mul_Stat_Procedures for computing a Maximum
!   Covariance Analyis (MCA) between two datasets.
!
!   COMP_MCA2 computes the covariance (or correlation) matrix between the two datasets
!   with only one pass through the data.
!
!   COMP_MCA2 then computes all singular values and only the leading singular vectors
!   of the covariance (or correlation) matrix by inverse iteration.
!
!
! LATEST REVISION : 29/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, one, comp_mca2, comp_pc_mca
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! mx AND p ARE THE DIMENSIONS OF THE FIRST DATASET,
! my AND p ARE THE DIMENSIONS OF THE SECOND DATASET,
! mx, my AND p ARE, RESPECTIVELY, THE NUMBER OF VARIABLES AND OBSERVATIONS.
! nsvd IS THE NUMBER OF THE LEADING SINGULAR VECTORS WHICH MUST BE COMPUTED AND
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THESE SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, mx=20, my=10, m=min(mx,my), mxy=mx+my, p=50,   &
                               nsvd=3, maxiter=2, nsingp=(nsvd*(nsvd+1))/2
!
    character(len=*), parameter :: name_proc='Example 1 of comp_mca2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                         :: err_xpc, err_ypc, xyn
    real(stnd), dimension(mx,my)       :: xycor
    real(stnd), dimension(mxy,nsvd)    :: xysingvec
    real(stnd), dimension(mx,2)        :: xstat
    real(stnd), dimension(my,2)        :: ystat
    real(stnd), dimension(m)           :: xysingval, xysingvar
    real(stnd), dimension(nsingp)      :: pccorp_x, pccorp_y
    real(stnd), dimension(mx,nsvd)     :: xpccor
    real(stnd), dimension(my,nsvd)     :: ypccor
    real(stnd), dimension(p,nsvd)      :: xpc, ypc
    real(stnd), dimension(mx,p)        :: x, x_std
    real(stnd), dimension(my,p)        :: y, y_std
!
    logical(lgl) :: first, last, failure
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM OBSERVATION MATRIX x(:mx,:p) WITH mx VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( x )
!
!   GENERATE A RANDOM OBSERVATION MATRIX y(:my,:p) WITH my VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( y )
!
!   FIRST COMPUTE THE FIRST nsvd LEFT AND RIGHT SINGULAR VECTORS OF THE CORRELATION MATRIX.
!
    first = true
    last  = true
!
    call comp_mca2( x, y, first, last, xstat, ystat, xysingval, xycor, failure,    &
                    maxiter=maxiter, xysingvec=xysingvec, xysingvar=xysingvar  )
!
!   ON EXIT OF COMP_MCA2 WHEN last=true :
!
!   xstat CONTAINS THE FOLLOWING STATISTICS ON ALL VARIABLES FROM THE x MATRIX:
!            
!       xstat(:,1) CONTAINS THE MEAN VALUES OF THE "LEFT" DATA MATRIX x.
!       xstat(:,2) CONTAINS THE STANDARD-DEVIATIONS OF THE "LEFT" DATA MATRIX x.
!
!   ystat CONTAINS THE FOLLOWING STATISTICS ON ALL VARIABLES FROM THE y MATRIX:
!            
!       ystat(:,1) CONTAINS THE MEAN VALUES OF THE "RIGHT" DATA MATRIX y.
!       ystat(:,2) CONTAINS THE STANDARD-DEVIATIONS OF THE "RIGHT" DATA MATRIX y.
!            
!   xysingval CONTAINS THE m SINGULAR VALUES OF THE CORRELATION
!   (OR COVARIANCE) MATRIX BETWEEN THE DATA MATRICES x AND y.
!
!   WHEN OPTIONAL ARGUMENT savecor IS PRESENT AND savecor=TRUE, xycor CONTAINS
!
!       THE CORRELATION OR VARIANCE-COVARIANCE MATRIX AS CONTROLLED BY THE COV ARGUMENT.
!       IN THIS CASE xycor(i,j) CONTAINS THE CORRELATION (OR COVARIANCE) COEFFICIENT
!       BETWEEN x(i,:) AND y(j,:) ( x(:,i) AND y(:,j) IF dimvarx=2 AND
!       dimvary=2 ).
!
!   IF savecor=FALSE OR IS ABSENT, THE CORRELATION (OR COVARIANCE) MATRIX IS NOT SAVED ON EXIT.
!   IN THIS CASE, xycor DOES NOT CONTAIN USEFUL INFORMATION.
!
!   failure = FALSE :  INDICATES SUCCESSFUL EXIT.
!   failure = TRUE  :  INDICATES THAT FEWER THAN TWO VALID OBSERVATIONS
!                      WERE PRESENT OR THAT MAXIMUM ACCURACY WAS NOT
!                      ACHIEVED WHEN COMPUTING THE SVD OF THE COVARIANCE
!                      (OR CORRELATION) MATRIX BETWEEN THE DATA MATRICES x AND y .
!
!   xysingvec CONTAINS THE FIRST nsvd RIGHT SINGULAR VECTORS OF THE CORRELATION
!             (OR COVARIANCE) MATRIX BETWEEN x AND y IN xysingvec(1:mx,:nsvd)
!             AND THE FIRST nsvd LEFT SINGULAR VECTORS IN xysingvec(mx+1:mxy,:nsvd)
!
!   xysingvar CONTAINS THE PERCENTAGES OF TOTAL SQUARED COVARIANCE ASSOCIATED 
!             WITH THE LEFT AND RIGHT SINGULAR VECTORS IN ORDER OF THE 
!             SINGULAR VALUES STORED IN xysingval.
!
!
!   NOW, COMPUTE THE FIRST nsvd LEFT SINGULAR VARIABLES FROM THE DATA AND THE LEFT SINGULAR VECTORS.
!
!   THE OPTIONAL PARAMETERS xmean AND xstd ARE REQUIRED IF THE SINGULAR VECTORS
!   HAVE BEEN COMPUTED FROM THE CORRRELATION MATRIX.
!
    first = true
    last  = true
!
    call comp_pc_mca( x(:mx,:p), xysingvec(:mx,:nsvd), first, last,                  &
                      xpccor(:mx,:nsvd), pccorp_x(:nsingp), xpc(:p,:nsvd), xyn,      &
                      xmean=xstat(:mx,1), xstd=xstat(:mx,2)                          )
!
!   ON EXIT OF COMP_PC_MCA WHEN last=true :
!
!     xpccor  CONTAINS :
!                 - THE CORRELATIONS BETWEEN THE DATA MATRIX x
!                   AND THE SINGULAR VARIABLES IF THE OPTIONAL
!                   ARGUMENTS xmean AND xstd ARE PRESENT.
!                 - THE COVARIANCES BETWEEN THE DATA MATRIX x
!                   AND THE NORMALIZED SINGULAR VARIABLES IF ONLY
!                   THE OPTIONAL ARGUMENT xmean IS PRESENT.
!
!     pccorp_x CONTAINS THE CORRELATION MATRIX COR BETWEEN THE SINGULAR VARIABLES
!              STORED IN ARGUMENT xpc. COR IS STORED IN SYMMETRIC STORAGE MODE.
!              MORE PRECISELY, THE J-TH COLUMN OF THIS MATRIX COR IS STORED IN THE 
!              ARRAY pccorp_x AS FOLLOWS:
!
!                         pccorp_x(i + (j-1)*j/2) = COR(i,j) for 1<=i<=j;
!
!     xpc      CONTAINS THE UNNORMALIZED SINGULAR VARIABLES DERIVED
!              FROM x AND xsingvec.
!
!   FINALLY, COMPUTE THE FIRST nsvd RIGHT SINGULAR VARIABLES FROM THE DATA AND THE RIGHT SINGULAR VECTORS.
!
    call comp_pc_mca( y(:my,:p), xysingvec(mx+1:mxy,:nsvd), first, last,             &
                      ypccor(:my,:nsvd), pccorp_y(:nsingp), ypc(:p,:nsvd), xyn,      &
                      xmean=ystat(:my,1), xstd=ystat(:my,2)                          )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION x_std**t*xsingvec - xpc
!   WHERE xsingvec ARE THE LEFT SINGULAR VECTORS, x_std THE STANDARDIZED LEFT DATA
!   AND xpc THE UNNORMALIZED LEFT SINGULAR VARIABLES.
!
    x_std(:mx,:p) = x(:mx,:p) - spread( xstat(:mx,1) , dim=2, ncopies=p )
    x_std(:mx,:p) = x_std(:mx,:p)*spread( one/xstat(:mx,2) , dim=2, ncopies=p ) 
!
    err_xpc = sum( abs(matmul(transpose(x_std),xysingvec(:mx,:nsvd))-xpc) )/sum( abs(x_std) )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION y_std**t*ysingvec - ypc
!   WHERE ysingvec ARE THE RIGHT SINGULAR VECTORS, y_std THE STANDARDIZED RIGHT DATA
!   AND ypc THE UNNORMALIZED RIGHT SINGULAR VARIABLES.
!
    y_std(:my,:p) = y(:my,:p) - spread( ystat(:my,1) , dim=2, ncopies=p )
    y_std(:my,:p) = y_std(:my,:p)*spread( one/ystat(:my,2) , dim=2, ncopies=p ) 
!
    err_ypc = sum( abs(matmul(transpose(y_std),xysingvec(mx+1:mxy,:nsvd))-ypc) )/sum( abs(y_std) )
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( max(err_xpc, err_ypc)<=sqrt( epsilon(err_xpc) ) ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_mca2
! ============================
!
end program ex1_comp_mca2

ex1_comp_mvs.F90

program ex1_comp_mvs
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_MVS
!   in module Stat_Procedures for computing univariate statistics (mean, variance,
!   and standard-deviation only) from a dataset.
!   The dataset can have up to four dimensions and the last one corresponds to
!   the observations.
!
!   All the statistics are computed with only one-pass on the data in one or several steps 
!   with a very efficient algorithm for large datasets, which also allows out-of-core
!   computations.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, comp_mvs
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; n, m AND p ARE THE DIMENSIONS OF THE ARRAY.
! p IS THE NUMBER OF OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, n=26, m=20, p=250
!
    character(len=*), parameter :: name_proc='Example 1 of comp_mvs'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                   :: err_mean, err_std, err_var, eps
    real(stnd), dimension(n,m)   :: xmean1, xmean2, xstd1, xstd2, xvar1, xvar2
    real(stnd), dimension(n,m,p) :: x
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION ARRAY x .
!
    call random_number( x )
!
!   FIRST COMPUTE THE MEANS, VARIANCES AND STANDARD-DEVIATIONS OF x FOR THE p OBSERVATIONS
!   IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_mvs( x(:,:,:), first, last, xmean1(:,:), xvar1(:,:), xstd1(:,:) )
!
!   SECOND RECOMPUTE THE MEANS, VARIANCES AND STANDARD-DEVIATIONS OF x, ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_mvs( x(:,:,i:i), first, last, xmean2(:,:), xvar2(:,:), xstd2(:,:) )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err_mean = maxval( abs( ( xmean1-xmean2)/xmean1 ) )
    err_var  = maxval( abs( ( xvar1-xvar2)/xvar1    ) )
    err_std  = maxval( abs( ( xstd1-xstd2)/xstd1    ) )
!
    if ( max(err_mean, err_var, err_std )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_mvs
! ===========================
!
end program ex1_comp_mvs

ex1_comp_ortho_rot_eof.F90

program ex1_comp_ortho_rot_eof
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_EOF,
!   COMP_PC_EOF and COMP_ORTHO_ROT_EOF in module Mul_Stat_Procedures for
!   performing an orthogonal rotation of a Principal Component Analysis model.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, zero, half, one, comp_eof, comp_pc_eof, &
                         comp_ortho_rot_eof, norm
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, p IS THE NUMBER OF OBSERVATIONS, m THE NUMBER OF VARIABLES
! AND nrot IS THE NUMBER OF EIGENVECTORS TO ROTATE.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=50, nrot=5
!
    character(len=*), parameter :: name_proc='Example 1 of comp_ortho_rot_eof'
!
    character, parameter :: sort = 'd'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                         :: eps, err_pc, err_rot1, err_rot2, err_rot3, err_rot4, xn, w, delta
    real(stnd), dimension(m,m)         :: eigvec
    real(stnd), dimension(m,nrot)      :: factor, rot_factor
    real(stnd), dimension(nrot,nrot)   :: rot
    real(stnd), dimension(m)           :: mean, std, eigval, eigvar, singval
    real(stnd), dimension(nrot)        :: std_rot_factor
    real(stnd), dimension(m,p)         :: x, x_std
    real(stnd), dimension(p,m)         :: pc
    real(stnd), dimension(p,nrot)      :: rot_pc
!
    integer(i4b) :: maxiter
!
    logical(lgl) :: first, last, cov, failure, failure2, knorm
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PERFORM A PRINCIPAL COMPONENT (E.G., EOF) ANALYSIS AND AN
!               ORTHOGONAL ROTATION OF THE EOF MODEL USING A GENERALIZED
!               ORTHOMAX CRITERION, INCLUDING QUARTIMAX AND VARIMAX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION MATRIX x(:m,:p) WITH m VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( x )
!
!   COMPUTE EOFs FROM THE CORRELATION MATRIX.
!
    cov = false
!
    first = true
    last  = true
!
    call comp_eof( x(:m,:p), first, last, eigval(:m), eigvec(:m,:m), xn, failure,       &
                   cov=cov, sort=sort, xmean=mean(:m), xstd=std(:m), xeigvar=eigvar(:m) )
!
!   ON EXIT OF COMP_EOF WHEN last=true :
!
!      eigval(:m)       CONTAINS THE EIGENVALUES.
!
!      eigvec(:m,:m)    CONTAINS THE EIGENVECTORS STORED COLUMNWISE IN THE ORDER
!                       OF THE EIGENVALUES STORED IN eigval.
!
!      xn               CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAY x,
!                       xn = real(p,stnd) .
!
!      failure = false  INDICATES SUCCESSFUL EXIT.
!      failure = true   INDICATES THAT FEWER THAN TWO VALID OBSERVATIONS
!                       WERE PRESENT OR THAT THE OBSERVATIONS ON SOME
!                       VARIABLE WERE CONSTANT AND THE CORRELATIONS
!                       WERE REQUESTED OR THAT MAXIMUM ACCURACY WAS NOT
!                       ACHIEVED WHEN COMPUTING THE EIGENVECTORS AND THE
!                       EIGENVALUES.
!
!      mean(:m)         CONTAINS THE MEAN VALUES OF THE m VARIABLES.
!
!      std(:m)          CONTAINS THE STANDARD-DEVIATIONS OF THE m VARIABLES.
!
!      eigvar(:m)       CONTAINS PERCENTAGES OF TOTAL VARIANCE ASSOCIATED 
!                       WITH THE EIGENVECTORS IN THE ORDER OF THE EIGENVALUES
!                       STORED IN eigval.
!
!
!   COMPUTE THE PRINCIPAL COMPONENTS FROM THE DATA AND THE EIGENVECTORS.
!
!   IN ORDER TO COMPUTE NORMALIZED PCs, SET singval(:m) = sqrt(eigval(:m)),
!   OTHERWISE SET singval(:) = one . sqrt(eigval(:m)) GIVES THE STANDARD-DEVIATIONS
!   ACCOUNTED FOR BY THE PC time series.
!
    singval(:m) = sqrt( eigval(:m) )
!
!   THE OPTIONAL PARAMETERS xmean AND xstd ARE REQUIRED IF THE EIGENVECTORS
!   HAVE BEEN COMPUTED FROM THE CORRELATION MATRIX.
!
    call comp_pc_eof( x(:m,:p), eigvec(:m,:m), singval(:m), pc(:p,:m),   &
                      xmean=mean(:m), xstd=std(:m)                       )
!
!   ON EXIT OF COMP_PC_EOF :
!
!      pc(:p,:m) CONTAINS THE PRINCIPAL COMPONENTS STORED COLUMNWISE.
!
!   COMPUTE THE FIRST nrot CORRESPONDING FACTORS FROM THE EIGENVECTORS AND THE EIGENVALUES.
!
    factor(:m,:nrot) = eigvec(:m,:nrot)*spread( singval(:nrot), dim=1, ncopies=m )
!
!   NOW ROTATE THE FIRST nrot FACTORS WITH SUBROUTINE comp_ortho_rot_eof.
!
!   USE w=0 FOR QUARTIMAX METHOD.
!
!    w = zero
!
!   USE w=1 FOR VARIMAX METHOD.
!
    w = one
!
!   USE w=nrot/2 FOR EQUAMAX METHOD.
!
!    w = real( nrot, stnd)*half
!
!   KAISER ROW NORMALIZATION IS PERFORMED IF knorm=true IS USED.
!
    knorm = true
!
!   USER-SPECIFIED CONVERGENCE CRITERION.
!
    delta = 0.0001_stnd
!
!   MAXIMUM NUMBER OF ITERATIONS FOR ROTATION.
!
    maxiter = 60_i4b
!
    call comp_ortho_rot_eof( factor(:m,:nrot), rot_factor(:m,:nrot), rot(:nrot,:nrot), &
                             std_rot_factor(:nrot), failure2, knorm=knorm, w=w,        &
                             maxiter=maxiter, delta=delta )
!
!   NOW ROTATE THE FIRST nrot PC TIME SERIES.
!
    rot_pc(:p,:nrot) = matmul( pc(:p,:nrot), rot(:nrot,:nrot) )
!
!   RESCALE THE ORIGINAL PC TIME SERIES FOR THE TESTS.
!
    pc(:p,:m) = pc(:p,:m)*spread( singval(:m), dim=1, ncopies=p )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION x_std**(t)*eigvec - pc
!   WHERE eigvec ARE THE EIGENVECTORS, x_std THE STANDARDIZED DATA
!   AND pc THE UNNORMALIZED PRINCIPAL COMPONENTS.
!
    x_std(:m,:p) = x(:m,:p) - spread( mean(:m) ,      dim=2, ncopies=p )
    x_std(:m,:p) = x_std(:m,:p)*spread( one/std(:m) , dim=2, ncopies=p ) 
!
    err_pc =  sum( abs( matmul(transpose(x_std),eigvec) - pc ) )
!
!   CHECK THAT THE VARIANCE EXPLAINED BY THE ORIGINAL AND ROTATED EOFS IS THE SAME.
!
    err_rot1 = abs( sum( std_rot_factor(:nrot)**2 ) - sum( eigval(:nrot) ) )
!
!   CHECK ORTHOGONALITY OF THE ROTATION MATRIX rot COMPUTED BY comp_ortho_rot_eof
!   SUBROUTINE.
!
    err_rot2 = abs( sum( abs( matmul(transpose(rot),rot) ) ) - real( nrot, stnd ) )
!
!   CHECK THAT THE NORMS OF THE ROTATED PCs ARE UNCHANGED.
!
    err_rot3 = maxval( abs( norm( rot_pc(:p,:nrot), dim=2 ) - sqrt( real(p,stnd) ) ) )
!
!   CHECK COMPUTATION OF VARIANCES EXPLAINED BY THE ROTATED PCs.
!
    err_rot4 = maxval( abs( std_rot_factor(:nrot) -    &
               norm( rot(:nrot,:nrot)*spread(singval(:nrot),dim=2,ncopies=nrot), dim=2 ) ) )
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( max(err_pc,err_rot1,err_rot2,err_rot3,err_rot4)<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_ortho_rot_eof
! =====================================
!
end program ex1_comp_ortho_rot_eof

ex1_comp_smooth_rot_pc.F90

program ex1_comp_smooth_rot_pc
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_EOF,
!   COMP_PC_EOF and COMP_SMOOTH_ROT_PC in module Mul_Stat_Procedures for
!   rotating a Principal Component Analysis model towards smoothed modes
!   in the time domain.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, one, comp_eof, comp_pc_eof, &
                         comp_smooth_rot_pc, norm
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, p IS THE NUMBER OF OBSERVATIONS, m THE NUMBER OF VARIABLES
! AND nrot IS THE NUMBER OF EIGENVECTORS OR PCS TO ROTATE.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=100, nrot=5
!
    character(len=*), parameter :: name_proc='Example 1 of comp_smooth_rot_pc'
!
    character, parameter :: sort = 'd'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                         :: eps, err_pc, err_rot1, err_rot2, err_rot3, err_rot4, xn
    real(stnd), dimension(m,m)         :: eigvec
    real(stnd), dimension(m,nrot)      :: factor, rot_factor
    real(stnd), dimension(nrot,nrot)   :: rot
    real(stnd), dimension(m)           :: mean, std, eigval, eigvar, singval
    real(stnd), dimension(nrot)        :: std_rot_pc
    real(stnd), dimension(m,p)         :: x, x_std
    real(stnd), dimension(p,m)         :: pc
    real(stnd), dimension(p,nrot)      :: rot_pc
!
    logical(lgl) :: first, last, cov, failure, failure2
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PERFORM A PRINCIPAL COMPONENT (E.G., EOF) ANALYSIS AND AN
!               ORTHOGONAL ROTATION OF THE PRINCIPAL COMPONENT TIME SERIES
!               TOWARDS SMOOTHED MODES.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION MATRIX x(:m,:p) WITH m VARIABLES
!   AND p OBSERVATIONS.
!
    call random_number( x )
!
!   COMPUTE EOFs FROM THE CORRELATION MATRIX .
!
    cov = false
!
    first = true
    last  = true
!
    call comp_eof( x(:m,:p), first, last, eigval(:m), eigvec(:m,:m), xn, failure,       &
                   cov=cov, sort=sort, xmean=mean(:m), xstd=std(:m), xeigvar=eigvar(:m) )
!
!   ON EXIT OF COMP_EOF WHEN last=true :
!
!      eigval(:m)       CONTAINS THE EIGENVALUES.
!
!      eigvec(:m,:m)    CONTAINS THE EIGENVECTORS STORED COLUMNWISE IN THE ORDER
!                       OF THE EIGENVALUES STORED IN eigval.
!
!      xn               CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAY x,
!                       xn = real(p,stnd) .
!
!      failure = false  INDICATES SUCCESSFUL EXIT.
!      failure = true   INDICATES THAT FEWER THAN TWO VALID OBSERVATIONS
!                       WERE PRESENT OR THAT THE OBSERVATIONS ON SOME
!                       VARIABLE WERE CONSTANT AND THE CORRELATIONS
!                       WERE REQUESTED OR THAT MAXIMUM ACCURACY WAS NOT
!                       ACHIEVED WHEN COMPUTING THE EIGENVECTORS AND THE
!                       EIGENVALUES.
!
!      mean(:m)         CONTAINS THE MEAN VALUES OF THE m VARIABLES.
!
!      std(:m)          CONTAINS THE STANDARD-DEVIATIONS OF THE m VARIABLES.
!
!      eigvar(:m)       CONTAINS PERCENTAGES OF TOTAL VARIANCE ASSOCIATED 
!                       WITH THE EIGENVECTORS IN THE ORDER OF THE EIGENVALUES
!                       STORED IN eigval.
!
!
!   COMPUTE THE PRINCIPAL COMPONENTS FROM THE DATA AND THE EIGENVECTORS.
!
!   IN ORDER TO COMPUTE NORMALIZED PCs, SET singval(:m) = sqrt(eigval(:m)),
!   OTHERWISE SET singval(:) = one . sqrt(eigval(:m)) GIVES THE STANDARD-DEVIATIONS
!   ACCOUNTED FOR BY THE PC time series.
!
    singval(:m) = sqrt( eigval(:m) )
!
!   THE OPTIONAL PARAMETERS xmean AND xstd ARE REQUIRED IF THE EIGENVECTORS
!   HAVE BEEN COMPUTED FROM THE CORRELATION MATRIX.
!
    call comp_pc_eof( x(:m,:p), eigvec(:m,:m), singval(:m), pc(:p,:m),   &
                      xmean=mean(:m), xstd=std(:m)                       )
!
!   ON EXIT OF COMP_PC_EOF :
!
!      pc(:p,:m) CONTAINS THE (STANDARDIZED) PRINCIPAL COMPONENTS STORED COLUMNWISE.
!
!   COMPUTE THE FIRST nrot CORRESPONDING FACTORS FROM THE EIGENVECTORS AND THE SINGULAR VALUES.
!
    factor(:m,:nrot) = eigvec(:m,:nrot)*spread( singval(:nrot), dim=1, ncopies=m )
!
!   NOW ROTATE THE FIRST nrot PC TIME SERIES WITH SUBROUTINE comp_smooth_rot_pc.
!
    call comp_smooth_rot_pc( pc(:p,:nrot), singval(:nrot), rot_pc(:p,:nrot), rot(:nrot,:nrot), &
                             std_rot_pc(:nrot), failure2 )
!
!   NOW ROTATE THE FIRST nrot FACTORS.
!
    rot_factor(:m,:nrot) = matmul( factor(:m,:nrot), rot(:nrot,:nrot) )
!
!   RESCALE THE ORIGINAL PC TIME SERIES FOR THE TESTS.
!
    pc(:p,:m) = pc(:p,:m)*spread( singval(:m), dim=1, ncopies=p )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION x_std**(t)*eigvec - pc
!   WHERE eigvec ARE THE EIGENVECTORS, x_std THE STANDARDIZED DATA
!   AND pc THE UNNORMALIZED PRINCIPAL COMPONENTS.
!
    x_std(:m,:p) = x(:m,:p) - spread( mean(:m) ,      dim=2, ncopies=p )
    x_std(:m,:p) = x_std(:m,:p)*spread( one/std(:m) , dim=2, ncopies=p ) 
!
    err_pc =  sum( abs( matmul(transpose(x_std),eigvec) - pc ) )
!
!   CHECK THAT THE VARIANCE EXPLAINED BY THE ORIGINAL AND ROTATED EOFS IS THE SAME.
!
    err_rot1 = abs( sum( std_rot_pc(:nrot)**2 ) - sum( eigval(:nrot) ) )
!
!   CHECK ORTHOGONALITY OF THE ROTATION MATRIX rot COMPUTED BY comp_smooth_rot_eof
!   SUBROUTINE.
!
    err_rot2 = abs( sum( abs( matmul(transpose(rot),rot) ) ) - real(nrot,stnd) )
!
!   CHECK THAT THE NORMS OF THE ROTATED PCS ARE UNCHANGED.
!
    err_rot3 =  maxval( abs( norm( rot_pc(:p,:nrot), dim=2 ) - sqrt( real(p,stnd) ) ) )
!
!   CHECK THE COMPUTATION OF VARIANCES EXPLAINED BY THE ROTATED PCS.
!
    err_rot4 = maxval( abs( std_rot_pc(:nrot) - norm( rot_factor(:m,:nrot), dim=2 ) ) )
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( max(err_pc,err_rot1,err_rot2,err_rot3,err_rot4)<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_smooth_rot_pc
! =====================================
!
end program ex1_comp_smooth_rot_pc

ex1_comp_sym_ginv.F90

program ex1_comp_sym_ginv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_SYM_GINV
!   in module Lin_Procedures for computing a generalized inverse of a symmetric
!   semi-positive definite matrix. 
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    USE Statpack, ONLY : lgl, i4b, stnd, true, false, zero, one, c10, comp_sym_ginv, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SEMI-POSITIVE DEFINITE MATRIX
! AND m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX WHICH IS USED TO DERIVE THE SEMI-POSITIVE
! DEFINITE MATRIX. IF m IS LESS THAN n, A SEMI-POSITIVE MATRIX IS GENERATED, ON THE OTHER HAND IF
! m IS GREATER OR EQUAL TO n, A POSITIVE MATRIX IS GENERATED.
!
    integer(i4b), parameter :: prtunit=6, n=1000, m=n-10_i4b
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!
    real(stnd), parameter :: fudge=c10
!
    character(len=*), parameter :: name_proc='Example 1 of comp_sym_ginv'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, tol, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, ata, atainv, res
!
    integer(i4b) :: j, krank
    integer      :: iok, istart, iend, irate
!
    logical(lgl) :: do_test, upper, failure
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : GENERALIZED INVERSE OF A REAL SYMMETRIC SEMI-POSITIVE DEFINITE MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS AND THE TOLERANCE TO BE USED
!   FOR THE TEST OF SINGULARITY IN comp_sym_ginv .
!
    tol = sqrt( epsilon( err ) )
    eps = fudge*tol
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = false
!
!   SPECIFY IF THE UPPER OR LOWER PART OF THE SYMMETRIC
!   DEFINITE POSITIVE MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), ata(n,n), atainv(n,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
! 
!   GENERATE A RANDOM DATA MATRIX a .
!
    call random_number( a )
!
!   GENERATE A n-BY-n SYMMETRIC POSITIVE SEMIDEFINITE MATRIX From a .
!
    ata = matmul( transpose(a), a )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart, count_rate=irate )
!
!   COMPUTE A GENERALIZED INVERSE OF ata WITH SUBROUTINE comp_sym_ginv.
!   INPUT ARGUMENT ata IS NOT OVERWRITTEN.
!
    call comp_sym_ginv( ata, failure, krank, atainv, upper=upper, tol=tol )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    elapsed_time = real( iend - istart, stnd )/real( irate, stnd )
!
    if ( failure ) then
!
!       ANORMAL EXIT FROM comp_sym_ginv SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in the call to COMP_SYM_GINV subroutine, failure=', failure
!
    else if ( do_test ) then
!
!       ALLOCATE WORK ARRAY .
!
        allocate( res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE TWO, IDENTITIES ata*atainv*ata = a AND  atainv*ata*atainv = atainv,
!       WHICH DEFINE THE GENERALIZED INVERSE OF ata.
!
        res = matmul(ata, matmul(atainv,ata)) - ata
        err = sum( abs(res) ) / sum( abs(ata) )
!
        res = matmul(atainv, matmul(ata,atainv)) - atainv
        err = max( sum( abs(res) ) / sum( abs(atainv) ), err )
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        err = sum( abs(res) ) / sum( abs(ata) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, ata, atainv )
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the generalized inverse of a symmetric semi-positive definite matrix of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_comp_sym_ginv
! ================================
!
end program ex1_comp_sym_ginv

ex1_comp_sym_inv.F90

program ex1_comp_sym_inv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_SYM_INV
!   in module Lin_Procedures for computing the inverse of a real symmetric
!   positive-definite matrix.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    USE Statpack, only : lgl, i4b, stnd, true, false, zero, one, comp_sym_inv, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED POSITIVE DEFINITE MATRIX
! AND m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX WHICH IS USED TO DERIVE THE POSITIVE
! DEFINITE MATRIX. m SHOULD BE GREATER OR EQUAL TO n.
!
    integer(i4b), parameter :: prtunit=6, n=4000, m=4000
!
    character(len=*), parameter :: name_proc='Example 1 of comp_sym_inv'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, ata, atainv, res
!
    integer(i4b) :: j
    integer      :: iok, istart, iend, irate
!
    logical(lgl) :: do_test, upper, failure
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : INVERSE OF A REAL SYMMETRIC POSITIVE DEFINITE MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = false
!
!   SPECIFY IF THE UPPER OR LOWER PART OF THE SYMMETRIC
!   DEFINITE POSITIVE MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), ata(n,n), atainv(n,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
! 
!   GENERATE A RANDOM DATA MATRIX a WITH MORE ROWS THAN COLUMNS.
!
    call random_number( a )
!
!   GENERATE A n-BY-n SYMMETRIC POSITIVE DEFINITE MATRIX From a .
!
    ata = matmul( transpose(a), a )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart, count_rate=irate )
!
!   COMPUTE THE MATRIX INVERSE OF ata WITH SUBROUTINE comp_sym_inv.
!   INPUT ARGUMENT ata IS NOT OVERWRITTEN.
!
    call comp_sym_inv( ata, failure, atainv, upper=upper )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    elapsed_time = real( iend - istart, stnd )/real( irate, stnd )
!
    if ( failure ) then
!
!       ANORMAL EXIT FROM comp_sym_inv SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in the call to COMP_SYM_INV subroutine, failure=', failure
!
    else if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE ata TIMES ITS INVERSE - IDENTITY.
!
        res = matmul( ata, atainv )
!
        do j = 1_i4b, n
            res(j,j) = res(j,j) - one
        end do
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        err = sum( abs(res) ) / sum( abs(ata) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, ata, atainv )
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the inverse of a positive definite symmetric matrix of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_comp_sym_inv
! ===============================
!
end program ex1_comp_sym_inv

ex1_comp_triang_inv.F90

program ex1_comp_triang_inv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_TRIANG_INV
!   in module Lin_Procedures for computing the inverse of a real triangular matrix.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, one, true, false, allocate_error,  &
                         triangle, norm, comp_triang_inv, merror
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE TRIANGULAR MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=4000, p=n*(n+1)/2
!
    character(len=*), parameter :: name_proc='Example 1 of comp_triang_inv'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, ainv, res
    real(stnd), dimension(:),   allocatable :: ap
!
    integer(i4b) :: j
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE INVERSE OF A REAL n-BY-n TRIANGULAR MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SPECIFY IF THE MATRIX IS UPPER OR LOWER TRIANGULAR.
!
    upper = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), ainv(n,n), ap(p), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM TRIANGULAR MATRIX IN PACKED FORM ap .
!
    call random_number( ap )
!
!   MAKE SURE THAT TRIANGULAR MATRIX IS NOT SINGULAR.
!
    ap = ap + real( n, stnd )
!
!   UNPACK THE TRIANGULAR MATRIX a .
!
    a = unpack( ap, mask=triangle(upper,n,n,extra=1_i4b), field=zero )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE IN PLACE THE INVERSE OF a WITH SUBROUTINE comp_triang_inv.
!   THE INPUT ARGUMENT IS OVERWRITTEN.
!
    call comp_triang_inv( a, ainv, upper=upper )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        if ( upper ) then
!
            do j = 1_i4b, n-1_i4b
                ainv(j+1_i4b:n,j) = zero
            end do
!
        else
!
            do j = 2_i4b, n
                ainv(1_i4b:j-1_i4b,j) = zero
            end do
!
        end if
!
!       COMPUTE a TIMES ITS INVERSE - IDENTITY.
!
        res(:n,:n) = matmul( a(:n,:n), ainv(:n,:n) )
!
        do j = 1_i4b, n
            res(j,j) = res(j,j) - one
        end do
!
        err = norm( res(:n,:n) ) /( real(n,stnd)*norm(a(:n,:n)) )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, ainv, ap, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, ainv, ap )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the inverse of a real triangular matrix of size ', &
       n, ' by ', n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_comp_triang_inv
! ==================================
!
end program ex1_comp_triang_inv

ex1_comp_unistat.F90

program ex1_comp_unistat
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_UNISTAT
!   in module Stat_Procedures for computing univariate statistics from a dataset.
!   The dataset can have up to four dimensions and the last one corresponds to
!   the observations.
!
!   All the statistics are computed with only one-pass on the data in one or several steps 
!   with a very efficient algorithm for large datasets, which also allows out-of-core
!   computations.
!
!
! LATEST REVISION : 14/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_unistat
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; n, m AND p ARE THE DIMENSIONS OF THE ARRAYS.
! p IS THE NUMBER OF OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, n=26, m=20, p=250
!
    character(len=*), parameter :: name_proc='Example 1 of comp_unistat'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                   :: err1, err2, eps
    real(stnd), dimension(n,m,7) :: xstat1, xstat2
    real(stnd), dimension(n,m,p) :: x
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION ARRAY x .
!
    call random_number( x )
!
!   FIRST COMPUTE THE STATISTICS OF x FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_unistat( x(:n,:m,:p), first, last, xstat1(:n,:m,:7) )
!
!   ON EXIT, WHEN last=true, xstat1 CONTAINS THE FOLLOWING
!   STATISTICS ON ALL VARIABLES :
!
!        xstat1(:,:,1) CONTAINS THE MEAN VALUES.
!        xstat1(:,:,2) CONTAINS THE VARIANCES.
!        xstat1(:,:,3) CONTAINS THE STANDARD DEVIATIONS.
!        xstat1(:,:,4) CONTAINS THE COEFFICIENTS OF SKEWNESS.
!        xstat1(:,:,5) CONTAINS THE COEFFICIENTS OF KURTOSIS.
!        xstat1(:,:,6) CONTAINS THE MINIMA.
!        xstat1(:,:,7) CONTAINS THE MAXIMA.
!
!   SECOND COMPUTE THE STATISTICS OF x, ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_unistat( x(:n,:m,i:i), first, last, xstat2(:n,:m,:7) )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err1 = maxval( abs( (xstat2(:,:,1:3)-xstat1(:,:,1:3))/xstat1(:,:,1:3) ) )
    err2 = maxval( abs( xstat2(:,:,4:7)-xstat1(:,:,4:7) ) )
!
    if ( max(err1, err2)<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_comp_unistat
! ===============================
!
end program ex1_comp_unistat

ex1_cpusecs.F90

program ex1_cpusecs
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function CPUSECS
!   in module Time_Procedures to obtain the current value of the system
!   cpu usage clock in EXTD precision.
!
!
! LATEST REVISION : 04/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, extd, cpusecs
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(extd)   :: tim1, tim2
!
    integer(i4b) :: i, j
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of cpusecs'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   FUNCTION cpusecs OBTAINS, FROM THE INTRINSIC ROUTINE SYSTEM_CLOCK,
!   THE CURRENT VALUE OF THE SYSTEM CPU USAGE CLOCK. THIS VALUE
!   IS THEN CONVERTED TO SECONDS AND RETURNED AS AN EXTENDED PRECISION
!   REAL VALUE.
!
!   THIS FUNCTIONS ASSUMES THAT THE NUMBER OF CPU CYCLES (CLOCK COUNTS) BETWEEN
!   TWO CALLS IS LESS THAN COUNT_MAX, THE MAXIMUM POSSIBLE VALUE OF CLOCK COUNTS
!   AS RETURNED BY THE INTRINSIC ROUTINE SYSTEM_CLOCK.
!
!   THIS FUNCTION WILL NOT WORK PROPERLY WITH OPENMP.
!
!   A TYPICAL USE OF THIS FUNCTION IS AS FOLLOWS :
!
    tim1 = cpusecs()
    j = 0
!
    do i=1, 2000000000
        j = j + 1
    end do
!
    tim2 = cpusecs()
!
!   PRINT THE RESULT.
!
    write (prtunit, *)  " CPU Time(s): ", tim2-tim1 ,' seconds'
!
!
! END OF PROGRAM ex1_cpusecs
! ==========================
!
end program ex1_cpusecs

ex1_cur_cmp.F90

program ex1_cur_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines CUR_CMP in 
!   module Random and ORTHO_GEN_QR in module QR_Procedures for computing a
!   (randomized or deterministic) cur decomposition of a data matrix.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, allocate_error, &
                         cur_cmp, partial_qr_cmp, ortho_gen_qr, norm, merror, gen_random_mat,     &
                         random_seed_
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX;
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX;
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX;
! ncur IS THE TARGET RANK OF THE CUR DECOMPOSITION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit = 6, m=5000, n=3000, nsvd0=2000, ncur=100
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of cur_cmp'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, err4, err5, err6, tmp, norma, normr, &
                                               rnorm_row, rnorm_col, eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, singval0
    real(stnd), allocatable, dimension(:,:) :: a, c, u, r, ur, resid
!
    integer(i4b)                            :: i, blk_size, nover, mat_type
    integer(i4b), allocatable, dimension(:) :: ip_row, ip_col
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, test_err, random_qr
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE A (RANDOMIZED OR DETERMINISTIC) CUR DECOMPOSITION
!               OF A DATA MATRIX.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 1_i4b
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE RESULTS OF THE SUBROUTINE.
!
    do_test = true
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
    test_err = true
!
!   DETERMINE PARAMETERS OF THE CUR DECOMPOSITION ALGORITHM.
!
!   SET TOLERANCE FOR CHECKING THE RANK OF THE CUR APPROXIMATION IN THE SUBROUTINE.
!
    tol = eps
!
!   SPECIFY IF A RANDOMIZED OR DETERMINISTIC CUR ALGORITHM IS USED.
!
    random_qr = false
!
!   DETERMINE THE BLOCKSIZE PARAMETER IN THE RANDOMIZED CUR ALGORITHM.
!
    blk_size = 30_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE PARAMETER IN THE RANDOMIZED CUR ALGORITHM.
!
    nover = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        i = max( m, n )
!
        allocate( a(m,i), ip_row(m), ip_col(n), singval0(nsvd0), u(ncur,ncur), c(m,ncur), &
                  r(ncur,n), ur(ncur,n), diagr(ncur), beta(ncur), resid(m,i), stat=iok    )
!
    else
!
        allocate( a(m,n), ip_row(m), ip_col(n), singval0(nsvd0), u(ncur,ncur), c(m,ncur), &
                  r(ncur,n), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A (RANDOMIZED OR DETERMINISTIC) CUR DECOMPOSITION OF A RANDOM
!   DATA MATRIX a  WITH SUBROUTINE cur_cmp. THE RANK OF THE CUR DECOMPOSITION IS
!   DETERMINED BY THE NUMBER OF ROWS (OR COLUMNS) OF THE ARRAY ARGUMENT u,
!   ncur = size(u,1) = size(u,2) .
!
    call cur_cmp( a(:m,:n), ip_row(:m), ip_col(:n), u(:ncur,:ncur), c=c(:m,:ncur), r=r(:ncur,:n),  &
                  rnorm_row=rnorm_row, rnorm_col=rnorm_col, tol=tol, random_qr=random_qr,          &
                  blk_size=blk_size, nover=nover )
!
!   THE ROUTINE COMPUTES A (RANDOMIZED OR DETERMINISTIC) CUR DECOMPOSITION OF a AS:
!
!                       a ≈ c * u * r
!
!   WHERE c IS A m-BY-ncur MATRIX, u IS A ncur-BY-ncur SQUARED MATRIX AND r IS A ncur-BY-n MATRIX.
!   c and r ARE SELECTED, RESPECTIVELY, AS SUBSETS OF THE COLUMNS AND ROWS OF a. U IS THEN ESTIMATED
!   TO MINIMIZE THE FROBENIUS NORM OF THE ERROR OF THE RESULTING CUR DECOMPOSITION:
!
!                    || a - c * u * r ||_F = min
!
!   SUCH CUR DECOMPOSITION CAN BE COMPUTED EFFICIENTLY WITH THE HELP OF (RANDOMIZED
!   OR DETERMINISTIC) PARTIAL QR DECOMPOSITIONS WITH COLUMN PIVOTING OF a AND a'.
!
!   MORE PRECISELY, A (RANDOMIZED OR DETERMINISTIC) PARTIAL QR DECOMPOSITION OF a IS FIRST COMPUTED AS:
!
!                     a * P ≈ Q * T = Q * [ T11  T12 ]
!
!   WHERE P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-ncur MATRIX WITH ORTHOGONAL COLUMNS,
!   T IS A ncur-BY-n UPPER OR TRAPEZOIDAL MATRIX AND T11 IS A ncur-BY-ncur UPPER TRIANGULAR MATRIX.
!
!   THIS LEADS TO AN ESTIMATE OF c AS:
!
!                     c ≈ a * P(:,:ncur) = Q * T11
!
!   WHERE c IS A m-BY-ncur MATRIX, WHICH CONSISTS OF A SUBSET OF ncur COLUMNS OF a.
!
!   IN A SECOND STEP, A (RANDOMIZED OR DETERMINISTIC) PARTIAL QR DECOMPOSITION OF a' IS COMPUTED AS:
!
!                     a' * N' ≈ K * L = K * [ L11  L12 ]
!
!   WHERE N IS A m-BY-m PERMUTATION MATRIX, K IS A n-BY-ncur MATRIX WITH ORTHOGONAL COLUMNS,
!   L IS A ncur-BY-m UPPER OR TRAPEZOIDAL MATRIX AND L11 IS A ncur-BY-ncur UPPER TRIANGULAR MATRIX.
!
!   THIS LEADS TO AN ESTIMATE OF r AS:
!
!                     r ≈ N(:ncur,:) * a = L11' * K'
!
!   WHERE r IS A ncur-BY-n MATRIX, WHICH CONSISTS OF A SUBSET OF ncur ROWS OF a.
!
!   FINALLY, THE SQUARED MATRIX u IS THEN COMPUTED AS:
!
!                 u(:ncur,:ncur) = pseudo-inv(c) * a * pseudo-inv(r)
!
!   WHERE pseudo-inv(c) AND pseudo-inv(r) ARE THE GENERALIZED INVERSES OF c AND r, SINCE
!   THIS CHOICE LEADS TO MINIMIZE || a - c * u * r ||_F ONCE c AND r ARE KNOWN.
!
!   ON EXIT OF cur_cmp:
!
!   - ip_col STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip_col(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip_col AS FOLLOWS:
!     IF ip_col(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!
!   - ip_row STORES THE PERMUTATION MATRIX N IN THE QR DECOMPOSITION OF a'.
!     IF ip_row(j)=k, THEN THE jTH ROW OF N*a WAS THE kTH ROW OF a.
!     THE MATRIX N IS REPRESENTED IN THE ARRAY ip_row AS FOLLOWS:
!     IF ip_row(j) = i THEN THE jTH row OF N IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   THUS, WE HAVE c = a(:,ip_col(:ncur)) AND r = a(ip_row(:ncur),:) .         
!
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBERS OF THE SUBMATRICES T11
!       AND L11 ARE PERFORMED. THEN, tol IS USED TO DETERMINE THE EFFECTIVE RANKS
!       OF T11 AND L11, WHICH ARE DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRICES IN THE PARTIAL QR FACTORIZATIONS WITH PIVOTING OF a AND a',
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANKS OF T11 AND L11 ARE DETERMINED, E.G., CRUDE TESTS ON T(j,j)
!       AND L(j,j) ARE DONE TO DETERMINE THE NUMERICAL RANKS OF T11 AND L11.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF T11 AND L11 ARE NOT
!       PERFORMED AND THE RANKS OF T11 AND L11 ARE ASSUMED TO BE EQUAL TO ncur.
!
!   THE SUBROUTINE WILL EXIT WITH AN ERROR MESSAGE IF THE RANKS OF T11 OR L11 ARE LESS THAN ncur.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE UPPER BOUND FOR THE RELATIVE ERROR OF THE CUR APPROXIMATION.
!
    normr = sqrt( rnorm_row**2 + rnorm_col**2 )
!
    err1 = normr/norma
!                
    if ( do_test ) then
!
!       COMPUTE THE EXACT RELATIVE ERROR OF THE CUR APPROXIMATION.
!
        ur(:ncur,:n) = matmul( u(:ncur,:ncur), r(:ncur,:n) )
!
        resid(:m,:n) = a(:m,:n) - matmul( c(:m,:ncur), ur(:ncur,:n) )
!
        err2 = norm( resid(:m,:n) )/norma
!
!       CHECK COMPUTATION OF MATRIX c .
!
        resid(:m,:ncur) = a(:m,ip_col(:ncur)) - c(:m,:ncur)
!
        err3 = norm( resid(:m,:ncur) )
!
!       CHECK COMPUTATION OF MATRIX r .
!
        resid(:ncur,:n) = a(ip_row(:ncur),:n) - r(:ncur,:n)
!
        err4 = norm( resid(:ncur,:n) )
!
!       RECOMPUTE PARTIAL QR DECOMPOSITION OF MATRIX a .
!
        call partial_qr_cmp( a(:m,:n), diagr(:ncur), beta(:ncur), ip_col(:n), i )
!
!       GENERATE ORTHOGONAL MATRIX Q OF PARTIAL QR DECOMPOSITION OF MATRIX a .
!
        call ortho_gen_qr( a(:m,:m), beta(:ncur) )
!
!       HERE ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS THE PRODUCT OF ncur ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(ncur)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp, partial_rqr_cmp2
!       partial_rtqr_cmp, id_cmp AND ts_id_cmp SUBROUTINES.
!
!       THE SIZE OF beta DETERMINES THE NUMBER OF ELEMENTARY REFLECTORS WHOSE PRODUCT
!       DEFINES THE MATRIX Q.
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION c(:m,:ncur) - Q(:m,:ncur)*(Q(:m,:ncur)'*c(:m,:ncur)).
!
        ur(:ncur,:ncur) = matmul( transpose(a(:m,:ncur)), c(:m,:ncur) )
!
        resid(:m,:ncur) = abs( c(:m,:ncur) - matmul( a(:m,:ncur), ur(:ncur,:ncur) ) )
!
        err5 = maxval( resid(:m,:ncur) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF c(:m,:ncur) WITH ITS ORTHOGONAL COMPLEMENT Q(:m,ncur+1:m).
!
        if ( m>ncur ) then
!
            resid(:ncur,ncur+1_i4b:m) = matmul( transpose(c(:m,:ncur)), a(:m,ncur+1_i4b:m) )
!
            err6 = maxval( abs( resid(:ncur,ncur+1_i4b:m) ) )/real(m,stnd)
!
        else
!
            err6 = zero
!
        end if
!
        test_err = err2 <= err1
!
        if ( random_qr ) then
            err = max( err3, err4 )
        else
            err = max( err3, err4, err5, err6 )
        endif
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resid, ur, diagr, beta )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, ip_row, ip_col, singval0, c, u, r )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. test_err ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the CUR approximation              &
                       &                                       = ', ncur
!        
    write (prtunit,*) 'Upper bound of the relative error of CUR & 
                       & decomposition ||A - C*U*R||_F/||A||_F   = ', err1
!
    if ( do_test ) then
!        
        write (prtunit,*) 'Exact relative error of CUR decomposition& 
                           & ||A - C*U*R||_F/||A||_F                 = ', err2
!        
        write (prtunit,*) 'Accuracy of the range of the CUR approximation&
                           &                                    = ', err5
!
        if ( m>ncur ) then
            write (prtunit,*) 'Orthogonality of the range of the CUR approximation&
                               & and its orthogonal complement = ', err6
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing a (randomized) CUR decomposition of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_cur_cmp
! ==========================
!
end program ex1_cur_cmp

ex1_day_of_week.F90

program ex1_day_of_week
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function DAY_OF_WEEK
!   in module Time_Procedures to determine the day of the week from a given date.
!
!
! LATEST REVISION : 04/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, days, get_date, day_of_week
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer(i4b) :: iyr, imon, iday, idayweek
!
    character(len=11) :: date
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of day_of_week'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A DATE.
!
    iyr  = 2023
    imon = 12
    iday = 4
!
!   DETERMINE THE DAY OF THE WEEK FROM FROM GREGORIAN YEAR (iyr),
!   MONTH (imon) AND DAY (iday).
!
    idayweek = day_of_week( iyr, imon, iday )
!
!   FUNCTION ymd_to_dayweek RETURNS THE DAY OF THE WEEK (E.G., MON, TUE,...) AS AN INTEGER 
!   INDEX (MON=1 TO SUN=7) FOR THE GIVEN YEAR, MONTH, AND DAY IN THE GREGORIAN
!   CALENDAR PROMULGATED BY GREGORY XIII ON FRIDAY, 15 OCTOBER 1582.
!
!   NOTE THAT THE GREGORIAN CALENDAR WAS ADOPTED IN OCT. 15, 1582, AND HENCE
!   THIS ALGORITHM WILL NOT WORK PROPERLY FOR DATES EARLY THAN 10-15-1582.
!
!   PRINT THE RESULT.
!
    call get_date( iyr, imon, iday, date )
!
    write (prtunit,*) 'The date ' // date // ' is a ' // days(idayweek)
!
!
! END OF PROGRAM ex1_day_of_week
! ==============================
!
end program ex1_day_of_week

ex1_daynum_to_dayweek.F90

program ex1_daynum_to_dayweek
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function DAYNUM_TO_DAYWEEK
!   in module Time_Procedures for determining the day of the week from a Julian day.
!
!   See also program ex1_ymd_to_daynum.f90 and W. Kahan webpage
!   (https://people.eecs.berkeley.edu/~wkahan/) for more information.               
!
!
!
! LATEST REVISION : 04/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, ymd_to_daynum, daynum_to_dayweek, get_date, days
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer(i4b) :: iyr, imon, iday, idaynum, idayweek
!
    character(len=11) :: date
!
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of daynum_to_dayweek'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A DATE.
!
!    iyr  = 1582
!    imon = 10
!    iday = 15
!
    iyr  = 2023
    imon = 12
    iday = 4
!
!   CONVERTS GREGORIAN YEAR (iyr), MONTH (imon) AND DAY (iday) TO JULIAN DAY
!   NUMBER (idaynum).
!
    idaynum = ymd_to_daynum( iyr, imon, iday )
!
!   DETERMINE THE DAY OF THE WEEK FROM JULIAN DAY NUMBER (idaynum).
!   MONTH (imon) AND DAY (iday).
!
    idayweek = daynum_to_dayweek( idaynum )
!
!   FUNCTION daynum_to_dayweek RETURNS THE DAY OF THE WEEK (E.G., MON, TUE,...) AS AN
!   INTEGER INDEX (MON=1 TO SUN=7) FOR THE GIVEN JULIAN DAY NUMBER idaynum STARTING WITH
!   idaynum=1 ON FRIDAY, 15 OCTOBER 1582.
!
!   PRINT THE RESULT.
!
    call get_date( iyr, imon, iday, date )
!
    write (prtunit,*) 'The date ' // date // ' is a ' // days(idayweek)
!
!
! END OF PROGRAM ex1_daynum_to_dayweek
! ====================================
!
end program ex1_daynum_to_dayweek

ex1_daynum_to_ymd.F90

program ex1_daynum_to_ymd
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function YMD_TO_DAYNUM and
!   subroutine DAYNUM_TO_YMD in module Time_Procedures for computing the Julian
!   day from a given date and vice-versa.
!
!   See also program ex1_ymd_to_daynum.f90 and W. Kahan webpage
!   (https://people.eecs.berkeley.edu/~wkahan/) for more information.               
!
!
! LATEST REVISION : 04/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, ymd_to_daynum, daynum_to_ymd
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer(i4b) :: iyr, imon, iday, iyr2, imon2, iday2, idaynum
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of daynum_to_ymd'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A DATE.
!
    iyr  = 2023
    imon = 12
    iday = 4
!
!   CONVERTS GREGORIAN YEAR (iyr), MONTH (imon) AND DAY (iday) TO JULIAN DAY
!   NUMBER (idaynum).
!
    idaynum = ymd_to_daynum( iyr, imon, iday )
!
!   FUNCTION ymd_to_daynum CONVERTS THE THREE INTEGERS iyr, imon AND iday STANDING FOR
!   YEAR, MONTH, DAY IN THE GREGORIAN CALENDAR PROMULGATED BY GREGORY XIII ON
!   FRIDAY, 15 OCTOBER 1582, IN THE CORRESPONDING JULIAN DAY NUMBER STARTING
!   WITH ymd_to_daynum=1 ON FRIDAY, 15 OCTOBER 1582.
!
!   NOTE THE GREGORIAN CALENDAR WAS ADOPTED IN OCT. 15, 1582, AND HENCE
!   THIS FUNCTION WILL NOT WORK PROPERLY FOR DATES EARLY THAN 10-15-1582.
!   
!   CONVERTS A JULIAN DAY NUMBER (idaynum) TO GREGORIAN YEAR (iyr2), MONTH (imon2)
!   AND DAY (iday2).
!
    call daynum_to_ymd( idaynum, iyr2, imon2, iday2 )
!
!   SUBROUTINE daynum_to_ymd CONVERTS THE INTEGER idaynum TO THREE INTEGERS iyr2, imon2 AND
!   iday2 STANDING FOR YEAR, MONTH, DAY IN THE GREGORIAN CALENDAR PROMULGATED BY 
!   GREGORY XIII STARTING WITH idaynum=1 ON FRIDAY, 15 OCTOBER 1582.
!
!   TO KEEP POPE GREGORY'S CALENDAR SYNCHRONIZED WITH THE SEASONS FOR THE NEXT 
!   16000 YEARS OR SO, A SMALL CORRECTION HAS BEEN INTRODUCED; MILLENNIAL YEARS
!   DIVISIBLE BY 4000 ARE NOT CONSIDERED LEAP-YEARS (SEE W. KAHAN WEBPAGE AT FOR A
!   https://people.eecs.berkeley.edu/~wkahan/ FOR A DISCUSSION OF THIS CORRECTION).
!
!
!   NOTE THE GREGORIAN CALENDAR WAS ADOPTED IN OCT. 15, 1582, AND HENCE
!   THIS SUBROUTINE WILL NOT WORK PROPERLY FOR DATES EARLY THAN 10-15-1582,
!   E.G., IF idaynum < 1.
!
!   CHECK THE COMPUTATIONS.
!
    if ( iyr==iyr2 .and. imon==imon2 .and. iday==iday2  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_daynum_to_ymd
! ================================
!
end program ex1_daynum_to_ymd

ex1_det.F90

program ex1_det
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function DET
!   in module Lin_Procedures for computing the determinant of a square matrix.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, true, false, zero, one, det, inv, merror, allocate_error
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SQUARE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=500
!
    character(len=*), parameter :: name_proc='Example 1 of det'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: adet, ainvdet, err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, ainv
!
    integer :: iok, istart, iend, irate
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE DETERMINANT OF A REAL MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart, count_rate=irate )
!
!   COMPUTE THE DETERMINANT OF THE DATA MATRIX WITH FUNCTION det.
!
    adet = det( a )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    elapsed_time = real( iend - istart, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( ainv(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE MATRIX INVERSE OF a .
!
        ainv = inv( a ) 
!
!       COMPUTE THE DETERMINANT OF MATRIX INVERSE.
!
        ainvdet = det( ainv )
!
!       CHECK det(a**-1)*det(a)**-1 = 1.
!
        err = abs(adet*ainvdet - one) / max( abs(adet), abs(ainvdet) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( ainv )
!
    end if
!
!   DEALLOCATE WORK ARRAY.
!
    deallocate( a )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the determinant of a real matrix of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_det
! ======================
!
end program ex1_det

ex1_do_index.F90

program ex1_do_index
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines DO_INDEX and REORDER
!   in module Sort_Procedures for sorting integer and real sequences by means of an index.
!
!
! LATEST REVISION : 29/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, false, arth, do_index, reorder
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE ORDER OF THE SEQUENCES.
!
    integer(i4b), parameter :: prtunit=6, n=100
!
    character(len=*), parameter :: name_proc='Example 1 of do_index'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n) :: x
!
    integer(i4b)               :: i, j, k, i1, i2
    integer(i4b), dimension(n) :: y, indexx, indexy
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE RANDOM REAL DATA TO SORT.
!
    call random_number( x )
!
!   INITIALIZE PERMUTATION TO THE IDENTITY.
!
    y = arth( 1_i4b, 1_i4b, n ) 
!    
!   GENERATE A RANDOM ORDERING OF THE INTEGERS 1 TO n.
!   STARTING AT THE END, SWAP THE CURRENT LAST INDICATOR WITH ONE
!   RANDOMLY CHOSEN FROM THOSE PRECEEDING IT.

    do i = n, 2, -1
        j = 1 + i * x(i)
        if (j < i) then
            k    = y(i)
            y(i) = y(j)
            y(j) = k
        end if
    end do
!
!   COMPUTE INDEX FOR EACH ARRAY.
!
    call do_index( x, indexx )
    call do_index( y, indexy )
!
!   EXAMPLE 1 : SORT THE REAL DATA IN ASCENDING ORDER 
!               BY MEANS OF THE INDEX .
!
    call reorder( indexx, x  )
!
!   CHECK THAT THE SORTED ARRAY IS NON-DECREASING.
!
    i1 = count( x(1:n-1) > x(2:n) )
!
!   EXAMPLE 2 : SORT THE INTEGER DATA IN DECREASING ORDER
!               BY MEANS OF THE INDEX .
!
    call reorder( indexy, y, ascending=false )
!
!   CHECK THAT THE SORTED ARRAY IS NON-INCREASING.
!
    i2 = count( y(1:n-1) < y(2:n) )
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( i1==0 .and. i2==0 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_do_index
! ===========================
!
end program ex1_do_index

ex1_drawsample.F90

program ex1_drawsample
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_COR and DRAWSAMPLE
!   in modules Mul_Stat_Procedures and Random, respectively, for drawing a random sample
!   from a finite population and performing a permutation test of a correlation coefficient.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines RANDOM_SEED_, RANDOM_NUMBER_ in module Random
!    for generating random uniform numbers.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use statpack, only : i4b, stnd, lgl, true, comp_cor, drawsample, random_seed_, random_number_
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! p       IS THE NUMBER OF OBSERVATIONS OF THE RANDOM VECTORS;
! CORRELATION IS COMPUTED USING THE p1 TO p2 OBSERVATIONS (E.G. p1<p2<=p);
! nrep    IS THE NUMBER OF SHUFFLES FOR THE PERMUTATION TEST.
!
    integer(i4b), parameter :: prtunit=6, p=50, p1=26, p2=p, p3=p2-p1+1, nrep=999, nsample=2
!
    character(len=*), parameter :: name_proc='Example 1 of drawsample'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                        :: xyn
    REAL(stnd), dimension(nsample)    :: xycor, prob, xycor2
    real(stnd), dimension(nsample,2)  :: xstat
    real(stnd), dimension(2)          :: ystat
    real(stnd), dimension(nsample,p)  :: x
    real(stnd), dimension(nsample,p3) :: x2
    real(stnd), dimension(p)          :: y
    real(stnd), dimension(p3)         :: y2
!
    integer(i4b)                     :: i
    integer(i4b), dimension(p)       :: pop
    integer(i4b), dimension(nsample) :: nge
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   INITIALIZE THE RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM OBSERVATION VECTOR y .
!
    call random_number_( y(:p) )
!
!   GENERATE A RANDOM UNIFORM OBSERVATION ARRAY x .
!
    call random_number_( x(:nsample,:p) )
!
!   COMPUTE THE CORRELATIONS BETWEEN x AND y
!   USING ONLY THE p2-p1+1 LAST  OBSERVATIONS FOR THE TWO ARRAYS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cor( x(:nsample,p1:p2), y(p1:p2), first, last, xstat(:nsample,:2), ystat(:2),    &
                   xycor(:nsample), xyn )
!
!   ON EXIT OF COMP_COR WHEN last=true :
!
!      xstat(:nsample,1)     CONTAINS THE MEAN VALUES OF THE OBSERVATION ARRAY x(:nsample,p1:p2).
!
!      xstat(:nsample,2)     CONTAINS THE VARIANCES OF THE OBSERVATION ARRAY x(:nsample,p1:p2).
!
!      ystat(1)              CONTAINS THE MEAN VALUE OF THE OBSERVATION VECTOR  y(p1:p2).
!
!      ystat(2)              CONTAINS THE VARIANCE OF THE OBSERVATION VECTOR  y(p1:p2).
!
!      xycor(:nsample)       CONTAINS THE CORRELATION COEFFICIENTS BETWEEN x(:nsample,p1:p2) AND y(p1:p2).
!
!      xyn                   CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAYS
!                            x(:nsample,p1:p2) AND  y(p1:p2) (xyn=real(p2-p1+1,stnd) ).
!
!
!   NOW PERFORM A PERMUTATION TEST OF THE CORRELATION BETWEEN x AND y WITH
!   SUBROUTINES drawsample  AND comp_cor WITH nrep SHUFFLES.
!
    nge(:nsample) = 1
!
    do i=1, nrep
!
        call drawsample( p3, pop )
!
        x2(:nsample,:p3) = x(:nsample,pop(:p3))
        y2(:p3)          = y(pop(:p3))
!
        call comp_cor( x2(:nsample,:p3), y2(:p3), first, last, xstat(:nsample,:2), ystat(:2),    &
                       xycor2(:nsample), xyn )
!
        where( abs( xycor2(:nsample) )>= abs( xycor(:nsample) ) ) nge(:nsample) = nge(:nsample) + 1
!
    end do 
!
!   COMPUTE THE SIGNIFICANCE LEVELS.
!
    prob(:nsample) = real( nge(:nsample), stnd )/real( nrep+1, stnd )
!
    write (prtunit,*) 'Correlations  = ', xycor(:nsample)
    write (prtunit,*) 'Probabilities = ', prob(:nsample)
!
!
! END OF PROGRAM ex1_drawsample
! =============================
!
end program ex1_drawsample

ex1_eig_cmp.F90

program ex1_eig_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIG_CMP
!   in module Eig_Procedures for computing the full EigenValue Decomposition (EVD)
!   of a real symmetric matrix.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of the
!   tridiagonal reduction and tridiagonal QR algorithms are used.
!                                                                            
!                                                                            
! LATEST REVISION : 16/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6, &
                         eig_cmp, norm, unit_matrix, random_seed_, random_number_,       &
                         gen_random_sym_mat, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX,
! neig0 IS THE NUMERICAL RANK OF THE GENERATED SYMMETRIC MATRIX (WHICH MUST BE LESS OR EQUAL TO n)
! FOR CASES GREATER THAN 0.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig0=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of eig_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, tmp, tmp2, ulp, anorm, &
                                               elapsed_time
    real(stnd), dimension(:),   allocatable :: d, resid2
    real(stnd), dimension(:,:), allocatable :: a, a2, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl)   :: do_test, failure
!   
    character    :: sort='a'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC
!               MATRIX USING THE TRIDIAGONAL QR IMPLICIT SHIFT METHOD.
!
!   SPECIFY THE TYPE OF INPUT SYMMETRIC MATRIX:
!
!   mat_type < 1  -> RANDOM SYMMETRIC MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES
!   mat_type = 4  -> VERY SLOW DECAY OF EIGENVALUES
!   mat_type = 5  -> STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF EIGENVALUES
!
    mat_type = 2_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM SYMMETRIC MATRIX OR EIGENVALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM SYMMETRIC MATRIX.
!
            call random_number_( a )
!
            a = a + transpose( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM SYMMETRIC MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            tmp = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                d(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE.
!
            d(:neig0-1_i4b) = one
            d(neig0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                d(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, neig0
!
                d(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF EIGENVALUES.
!
            call random_number_( d(:neig0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( d ) ) then
!
            if ( .not.all( ieee_is_normal( d(:neig0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input symmetric matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF EIGENVALUES AND A RANK EQUAL TO neig0.
!
        call gen_random_sym_mat( d(:neig0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( d(:neig0) )
!
    end if
!
    if ( do_test ) then
!
        allocate( a2(n,n), resid(n,n), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE SELF-ADJOINT MATRIX a
!   WITH SUBROUTINE eig_cmp.
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
    call eig_cmp( a, d, failure, sort=sort )
!
!   THE ROUTINE RETURNS THE EIGENVALUES AND THE EIGENVECTORS OF a.
!
!   ON EXIT OF eig_cmp:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       a IS OVERWRITTEN WITH THE EIGENVECTORS, STORED COLUMNWISE;
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!   THE EIGENVECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        resid(:n,:n) = matmul(a2(:n,:n),a(:n,:n)) - a(:n,:n)*spread(d,dim=1,ncopies=n)
        resid2(:n) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( resid2(:n) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:n,:n) )
!
        resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose(a(:n,:n)), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, d, resid, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, d )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random symmetric matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered eigenvalues at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of eigenvalues'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eig_cmp() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', n, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_eig_cmp
! ==========================
!
end program ex1_eig_cmp

ex1_eig_cmp2.F90

program ex1_eig_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIG_CMP2
!   in module Eig_Procedures for computing the full EigenValue Decomposition (EVD)
!   of a real symmetric matrix.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of the
!   tridiagonal reduction and tridiagonal QR algorithms are used. A perfect shift strategy
!   and a wave-front algorithm for applying Givens rotations to eigenvectors are used in the
!   tridiagonal QR algorithm. With these changes, EIG_CMP2 is usually much faster than
!   subroutine EIG_CMP for computing an EVD of a real symmetric matrix for large matrices.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6, &
                         eig_cmp2, norm, unit_matrix, random_seed_, random_number_,      &
                         gen_random_sym_mat, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX,
! neig0 IS THE NUMERICAL RANK OF THE GENERATED SYMMETRIC MATRIX (WHICH MUST BE LESS OR EQUAL TO n)
! FOR CASES GREATER THAN 0.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig0=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of eig_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, tmp, tmp2, ulp, &
                                               anorm, elapsed_time
    real(stnd), dimension(:),   allocatable :: d, resid2
    real(stnd), dimension(:,:), allocatable :: a, a2, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl) :: do_test, failure
!   
    character :: sort='a'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC
!               MATRIX USING THE TRIDIAGONAL QR METHOD, A PERFECT SHIFT
!               STRATEGY FOR THE EIGENVECTORS AND A WAVE-FRONT ALGORITHM
!               FOR APPLYING GIVENS ROTATIONS IN THE TRIDIAGONAL QR
!               ALGORITHM.
!
!   SPECIFY THE TYPE OF INPUT SYMMETRIC MATRIX:
!
!   mat_type < 1  -> RANDOM SYMMETRIC MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES
!   mat_type = 4  -> VERY SLOW DECAY OF EIGENVALUES
!   mat_type = 5  -> STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF EIGENVALUES
!
    mat_type = 2_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM SYMMETRIC MATRIX OR EIGENVALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM SYMMETRIC MATRIX.
!
            call random_number_( a )
!
            a = a + transpose( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM SYMMETRIC MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            tmp = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                d(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE.
!
            d(:neig0-1_i4b) = one
            d(neig0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                d(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, neig0
!
                d(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF EIGENVALUES.
!
            call random_number_( d(:neig0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( d ) ) then
!
            if ( .not.all( ieee_is_normal( d(:neig0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input symmetric matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF EIGENVALUES AND A RANK EQUAL TO neig0.
!
        call gen_random_sym_mat( d(:neig0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( d(:neig0) )
!
    end if
!
    if ( do_test ) then
!
        allocate( a2(n,n), resid(n,n), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE SELF-ADJOINT MATRIX a
!   WITH SUBROUTINE eig_cmp2.
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
    call eig_cmp2( a, d, failure, sort=sort )
!
!   THE ROUTINE RETURNS THE EIGENVALUES AND THE EIGENVECTORS OF a.
!
!   ON EXIT OF eig_cmp2:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       a IS OVERWRITTEN WITH THE EIGENVECTORS, STORED COLUMNWISE;
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!   THE EIGENVECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        resid(:n,:n) = matmul(a2(:n,:n),a(:n,:n)) - a(:n,:n)*spread(d,dim=1,ncopies=n)
        resid2(:n) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( resid2(:n) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:n,:n) )
!
        resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose(a(:n,:n)), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, d, resid, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, d )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random symmetric matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered eigenvalues at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of eigenvalues'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eig_cmp2() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', n, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_eig_cmp2
! ===========================
!
end program ex1_eig_cmp2

ex1_eig_cmp3.F90

program ex1_eig_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIG_CMP3
!   in module Eig_Procedures for computing the full EigenValue Decomposition (EVD)
!   of a real symmetric matrix.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of the
!   tridiagonal reduction and tridiagonal QR algorithms are used.
!   A wave-front algorithm for applying Givens rotations to eigenvectors is used in the
!   tridiagonal QR algorithm. With this change, EIG_CMP3 is usually much faster than
!   subroutine EIG_CMP for computing an EVD of a real symmetric matrix for large matrices.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6, &
                         eig_cmp3, norm, unit_matrix, random_seed_, random_number_,      &
                         gen_random_sym_mat, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX,
! neig0 IS THE NUMERICAL RANK OF THE GENERATED SYMMETRIC MATRIX (WHICH MUST BE LESS OR EQUAL TO n)
! FOR CASES GREATER THAN 0.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig0=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of eig_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, tmp, tmp2, ulp, &
                                               anorm, elapsed_time
    real(stnd), dimension(:),   allocatable :: d, resid2
    real(stnd), dimension(:,:), allocatable :: a, a2, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl)   :: do_test, failure
!   
    character    :: sort='a'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC
!               MATRIX USING THE TRIDIAGONAL QR METHOD AND A WAVE-FRONT
!               ALGORITHM FOR APPLYING GIVENS ROTATIONS IN THE TRIDIAGONAL
!               QR ALGORITHM.
!
!   SPECIFY THE TYPE OF INPUT SYMMETRIC MATRIX:
!
!   mat_type < 1  -> RANDOM SYMMETRIC MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES
!   mat_type = 4  -> VERY SLOW DECAY OF EIGENVALUES
!   mat_type = 5  -> STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF EIGENVALUES
!
    mat_type = 2_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM SYMMETRIC MATRIX OR EIGENVALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM SYMMETRIC MATRIX.
!
            call random_number_( a )
!
            a = a + transpose( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM SYMMETRIC MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            tmp = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                d(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE.
!
            d(:neig0-1_i4b) = one
            d(neig0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                d(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, neig0
!
                d(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF EIGENVALUES.
!
            call random_number_( d(:neig0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( d ) ) then
!
            if ( .not.all( ieee_is_normal( d(:neig0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input symmetric matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF EIGENVALUES AND A RANK EQUAL TO neig0.
!
        call gen_random_sym_mat( d(:neig0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( d(:neig0) )
!
    end if
!
    if ( do_test ) then
!
        allocate( a2(n,n), resid(n,n), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE SELF-ADJOINT MATRIX a
!   WITH SUBROUTINE eig_cmp3.
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
    call eig_cmp3( a, d, failure, sort=sort )
!
!   THE ROUTINE RETURNS THE EIGENVALUES AND THE EIGENVECTORS OF a.
!
!   ON EXIT OF eig_cmp3:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       a IS OVERWRITTEN WITH THE EIGENVECTORS, STORED COLUMNWISE;
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!   THE EIGENVECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        resid(:n,:n) = matmul(a2(:n,:n),a(:n,:n)) - a(:n,:n)*spread(d,dim=1,ncopies=n)
        resid2(:n) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( resid2(:n) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:n,:n) )
!
        resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose(a(:n,:n)), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, d, resid, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, d )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random symmetric matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered eigenvalues at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of eigenvalues'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eig_cmp3() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', n, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_eig_cmp3
! ===========================
!
end program ex1_eig_cmp3

ex1_eigval_cmp.F90

program ex1_eigval_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIGVAL_CMP
!   in module Eig_Procedures for computing all eigenvalues of a real symmetric matrix.
!
!   The initial tridiagonal reduction is parallelized if OpenMP is used and the fast
!   Pal-Walker-Kahan variant of the QR method with implicit shift is used for
!   computing the eigenvalues.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing eigenvectors of a real symmetric matrix.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, trid_inviter,  &
                         eigval_cmp, merror, allocate_error, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED MATRIX,
! neig IS THE NUMBER OF THE WANTED EIGENVECTORS COMPUTED BY INVERSE ITERATIONS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP neig EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig=3000, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of eigval_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: d, res
    real(stnd), dimension(:,:), allocatable :: a, a2, eigvec, d_e
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test
!   
    character :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX USING THE FAST PAL-WALKER-KAHAN VARIANT OF THE QR METHOD
!               FOR EIGENVALUES AND THE INVERSE ITERATION TECHNIQUE FOR SELECTED EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test .and. neig>0 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE ALL EIGENVALUES OF THE SELF-ADJOINT MATRIX a AND SAVE
!   THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    call eigval_cmp( a, d, failure, sort=sort, d_e=d_e )
!
!   THE ROUTINE RETURNS THE EIGENVALUES OF a.
!
!   ON EXIT OF eigval_cmp:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       a IS OVERWRITTEN BY THE ORTHOGONAL MATRIX USED TO TRANSFORM a IN TRIDIAGONAL FORM IF
!         THE OPTIONAL ARGUMENT d_e IS SPECIFIED.
!         THE MATRIX Q IS STORED IN FACTORED FORM IN THE UPPER TRIANGLE OF a AND THE STRICTLY
!         LOWER TRIANGLE OF a IS NOT REFERENCED
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
    if ( .not. failure .and. neig>0  ) then
!
!       ALLOCATE WORK ARRAY NEEDED TO STORE THE EIGENVECTORS.
!
        allocate( eigvec(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST neig EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!       maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1_i4b), d_e(:n,2_i4b), d(:neig), eigvec, failure2, &
                           mat=a, maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. neig>0 ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        a(:n,:neig) = matmul( a2(:n,:n), eigvec(:n,:neig)) - eigvec(:n,:neig)*spread( d(:neig), dim=1, ncopies=n)
        res(:neig)  = norm( a(:n,:neig), dim=2_i4b )
!
        err1 = maxval( res(:neig) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:neig,:neig) )
!
        a2(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose(eigvec(:n,:neig)), eigvec(:n,:neig) ) )
!
        err2 = maxval( a2(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test .and. neig>0 ) then
!
        deallocate( a, d_e, d, a2, res )
!
    else
!
        deallocate( a, d_e, d )
!
    end if
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eigval_cmp()   ) = ', failure
!
    if ( .not. failure .and. neig>0  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter() ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure .and. neig>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', neig, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_eigval_cmp
! =============================
!
end program ex1_eigval_cmp

ex1_eigval_cmp2.F90

program ex1_eigval_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIGVAL_CMP2
!   in module Eig_Procedures for computing all eigenvalues of a real symmetric matrix.
!
!   The initial tridiagonal reduction is parallelized if OpenMP is used and the fast
!   Pal-Walker-Kahan variant of the QR method with implicit shift is used for
!   computing the eigenvalues.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing eigenvectors of a real symmetric matrix.
!
!
! LATEST REVISION : 27/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, trid_inviter,  &
                         eigval_cmp2, merror, allocate_error, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSIONS OF THE GENERATED MATRIX,
! neig IS THE NUMBER OF THE WANTED EIGENVECTORS COMPUTED BY INVERSE ITERATIONS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP neig EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of eigval_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: d, res
    real(stnd), dimension(:,:), allocatable :: a, a2, eigvec, d_e
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test
!   
    character :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX USING THE FAST PAL-WALKER-KAHAN VARIANT OF THE QR METHOD
!               FOR EIGENVALUES AND THE INVERSE ITERATION TECHNIQUE FOR SELECTED EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test .and. neig>0 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE ALL EIGENVALUES OF THE SELF-ADJOINT MATRIX a AND SAVE
!   THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    call eigval_cmp2( a, d, failure, sort=sort, d_e=d_e )
!
!   THE ROUTINE RETURNS THE EIGENVALUES OF a.
!
!   ON EXIT OF eigval_cmp2:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       a IS OVERWRITTEN BY THE ORTHOGONAL MATRIX USED TO TRANSFORM a IN TRIDIAGONAL FORM IF
!         THE OPTIONAL ARGUMENT d_e IS SPECIFIED.
!         THE MATRIX Q IS STORED IN FACTORED FORM IN THE UPPER TRIANGLE OF a AND THE STRICTLY
!         LOWER TRIANGLE OF a IS NOT REFERENCED
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
    if ( .not. failure .and. neig>0  ) then
!
!       ALLOCATE WORK ARRAY NEEDED TO STORE THE EIGENVECTORS.
!
        allocate( eigvec(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST neig EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!       maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1_i4b), d_e(:n,2_i4b), d(:neig), eigvec, failure2, &
                           mat=a, maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. neig>0 ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        a(:n,:neig) = matmul( a2(:n,:n), eigvec(:n,:neig)) - eigvec(:n,:neig)*spread( d(:neig), dim=1, ncopies=n)
        res(:neig)  = norm( a(:n,:neig), dim=2_i4b )
!
        err1 = maxval( res(:neig) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:neig,:neig) )
!
        a2(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose(eigvec(:n,:neig)), eigvec(:n,:neig) ) )
!
        err2 = maxval( a2(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test .and. neig>0 ) then
!
        deallocate( a, d_e, d, a2, res )
!
    else
!
        deallocate( a, d_e, d )
!
    end if
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eigval_cmp2()   ) = ', failure
!
    if ( .not. failure .and. neig>0  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter()  ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure .and. neig>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', neig, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_eigval_cmp2
! ==============================
!
end program ex1_eigval_cmp2

ex1_eigval_cmp3.F90

program ex1_eigval_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIGVAL_CMP3
!   in module Eig_Procedures for computing all eigenvalues of a real symmetric matrix.
!
!   The initial tridiagonal reduction is parallelized if OpenMP is used and the
!   QR method with implicit shift is used for computing the eigenvalues.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing eigenvectors of a real symmetric matrix.
!
!
! LATEST REVISION : 27/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, trid_inviter,  &
                         eigval_cmp3, merror, allocate_error, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSIONS OF THE GENERATED MATRIX,
! neig IS THE NUMBER OF THE WANTED EIGENVECTORS COMPUTED BY INVERSE ITERATIONS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP neig EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of eigval_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: d, res
    real(stnd), dimension(:,:), allocatable :: a, a2, eigvec, d_e
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test
!   
    character :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX USING THE QR METHOD FOR EIGENVALUES AND 
!               THE INVERSE ITERATION TECHNIQUE FOR SELECTED EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test .and. neig>0 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE ALL EIGENVALUES OF THE SELF-ADJOINT MATRIX a AND SAVE
!   THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    call eigval_cmp3( a, d, failure, sort=sort, d_e=d_e )
!
!   THE ROUTINE RETURNS THE EIGENVALUES OF a.
!
!   ON EXIT OF eigval_cmp3:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       a IS OVERWRITTEN BY THE ORTHOGONAL MATRIX USED TO TRANSFORM a IN TRIDIAGONAL FORM IF
!         THE OPTIONAL ARGUMENT d_e IS SPECIFIED.
!         THE MATRIX Q IS STORED IN FACTORED FORM IN THE UPPER TRIANGLE OF a AND THE STRICTLY
!         LOWER TRIANGLE OF a IS NOT REFERENCED
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
    if ( .not. failure .and. neig>0  ) then
!
!       ALLOCATE WORK ARRAY NEEDED TO STORE THE EIGENVECTORS.
!
        allocate( eigvec(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST neig EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!       maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1_i4b), d_e(:n,2_i4b), d(:neig), eigvec, failure2, &
                           mat=a, maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. neig>0 ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        a(:n,:neig) = matmul( a2(:n,:n), eigvec(:n,:neig)) - eigvec(:n,:neig)*spread( d(:neig), dim=1, ncopies=n)
        res(:neig)  = norm( a(:n,:neig), dim=2_i4b )
!
        err1 = maxval( res(:neig) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:neig,:neig) )
!
        a2(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose(eigvec(:n,:neig)), eigvec(:n,:neig) ) )
!
        err2 = maxval( a2(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test .and. neig>0 ) then
!
        deallocate( a, d_e, d, a2, res )
!
    else
!
        deallocate( a, d_e, d )
!
    end if
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eigval_cmp3()   ) = ', failure
!
    if ( .not. failure .and. neig>0  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter()  ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure .and. neig>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', neig, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_eigval_cmp3
! ==============================
!
end program ex1_eigval_cmp3

ex1_eigvalues.F90

program ex1_eigvalues
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function EIGVALUES 
!   in module Eig_Procedures for computing eigenvalues of a real symmetric matrix.
!
!
! LATEST REVISION : 22/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, svd_cmp, eigvalues, eigval_sort
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSIONS OF THE SYMMETRIC MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=1000
!
    character(len=*), parameter :: name_proc='Example 1 of eigvalues'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)  :: elapsed_time
    real(stnd)  :: a(n,n), a2(n,n), d(n), s(n)
!
    integer :: istart, iend, irate, imax, itime
!
    logical(lgl) :: failure
!   
    character :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX.
!
    call random_number( a2 )
!
    a = a2 + transpose( a2 )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE EIGENVALUES OF THE SELF-ADJOINT MATRIX.
!
    d = eigvalues( a )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   SORT ABSOLUTE VALUES OF EIGENVALUES.
!
    d = abs( d )
!
    call eigval_sort( sort, d )
!
!   FOR COMPARISON, COMPUTE THE SINGULAR VALUES OF THE SYMMETRIC MATRIX.
!
    call svd_cmp( a, s, failure, sort=sort )
!
!   CHECK THE RESULTS: MAGNITUDE OF EIGENVALUES SHOULD EQUAL THE SINGULAR VALUES.
!
    if ( sum(abs(d-s))<=sqrt(epsilon(s))*maxval(abs(s)) ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all the eigenvalues of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_eigvalues
! ============================
!
end program ex1_eigvalues

ex1_elapsed_time.F90

program ex1_elapsed_time
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function ELAPSED_TIME
!   in module Time_Procedures for computing the elapsed time between two times.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, elapsed_time
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!
    character(len=*), parameter :: name_proc='Example 1 of elapsed_time'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer,      dimension(7) :: t1, t0
    integer(i4b)               :: i, j
!
    character(len=13) :: string
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   FUNCTION elapsed_time COMPUTES ELAPSED TIME BETWEEN TWO INVOCATIONS OF THE INTRINSIC
!   FUNCTION DATE_AND_TIME. elapsed_time( T1, T0 ) RETURNS THE TIME IN SECONDS THAT HAS
!   ELAPSED BETWEEN THE VECTORS T0 AND T1. EACH VECTOR MUST HAVE AT LEAST SEVEN ELEMENTS
!   IN THE FORMAT RETURNED BY DATE_AND_TIME FOR THE OPTIONAL ARGUMENT VALUES; NAMELY
!
!           T = (/ year, month, day, x, hour, minute, second /)
!
!   THIS FUNCTION WORKS ACROSS MONTH AND YEAR BOUNDARIES BUT DOES NOT CHECK
!   THE VALIDITY OF ITS ARGUMENTS, WHICH ARE EXPECTED TO BE OBTAINED AS IN
!   THE FOLLOWING EXAMPLE THAT SHOWS HOW TO TIME SOME OPERATION BY USING ELAPSED_TIME.
!
!   THIS ROUTINE WORKS ALSO PROPERLY WITH OPENMP.
!
    call date_and_time( values=t0(:) )
    j = 0
    do i=1, 2000000000
        j = j + 1
    end do
    call date_and_time( values=t1(:) )
!
!   PRINT THE RESULT.
!
    write (prtunit, *)  'Elapsed Time (s): ', elapsed_time( t1(:), t0(:) )
!
!
! END OF PROGRAM ex1_elapsed_time
! ===============================
!
end program ex1_elapsed_time

ex1_fastgivens_mat_left.F90

program ex1_fastgivens_mat_left
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine FASTGIVENS_MAT_LEFT 
!   in module Giv_Procedures and subroutine triang_solve in module Lin_Procedures for
!   solving a full rank linear least squares problem by a QR factorization. The QR
!   factorization is computed by the application of a series of fast Givens rotations
!   on the left of the coefficient matrix of the problem.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, true, false, zero, one, c50, allocate_error, merror,  &
                         fastgivens_mat_left, triang_solve
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX AND
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=5000, n=1000, np1=n+1
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of fastgivens_mat_left'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, res, d
    real(stnd), allocatable, dimension(:,:) :: a, syst
!
    integer :: iok, istart, iend, irate, imax, itime
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A QR DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES PROBLEM
!
!                       Minimize || b - a*x ||_2
!
!               USING FAST GIVENS PLANE ROTATIONS. a IS A m-BY-n MATRIX WHICH IS ASSUMED OF FULL RANK.
!               THE RIGHT HAND SIDE b IS A m-ELEMENTS VECTOR AND THE SOLUTION VECTOR x IS A n-ELEMENTS
!               VECTOR.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), syst(m,np1), d(m), b(m), res(m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
!   MAKE A COPY OF COEFFICIENT MATRIX a(:m,:n) .
!
    syst(:m,:n) = a(:m,:n)
!
!   MAKE A COPY OF RIGHT HAND-SIDE VECTOR b(:m) .
!
    syst(:m,np1) = b(:m)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                  a(:m,:n)*x(:n) ≈ b(:m)
!
!   FIRST TRANSFORM THE MATRIX a TO UPPER TRAPEZOIDAL FORM BY APPLYING 
!   A SERIES OF FAST GIVENS PLANE ROTATIONS ON THE ROWS OF a FROM THE LEFT
!   AND APPLY THE ROTATIONS TO b .
!
    d(:m) = one
!
    call fastgivens_mat_left( syst(:m,:np1), d(:m) )
!    
!   SOLVE THE n-BY-n UPPER TRIANGULAR SYSTEM.
!    
    call triang_solve( syst(:n,:n), syst(:n,np1), upper=true, trans=false ) 
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF a .
!
    res(:m) = b(:m) - matmul( a(:m,:n), syst(:n,np1) )
    err = sum(abs(matmul(res(:m)  ,a(:m,:n))) )/ sum( abs(a(:m,:n)) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, syst, d, b, res )
!
!
! END OF PROGRAM ex1_fastgivens_mat_left
! ======================================
!
end program ex1_fastgivens_mat_left

ex1_fastgivens_mat_right.F90

program ex1_fastgivens_mat_right
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine FASTGIVENS_MAT_RIGHT
!   in module Giv_Procedures and subroutine triang_solve in module Lin_Procedures for
!   solving a full rank linear least squares problem by a LQ factorization. The LQ
!   factorization is computed by the application of a series of fast Givens rotations
!   on the right of the coefficient matrix of the problem. 
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, true, false, zero, one, c50, allocate_error, merror,  &
                         fastgivens_mat_right, triang_solve
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! n IS THE NUMBER OF ROWS OF THE RANDOM MATRIX AND
! m IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=1000, np1=n+1, m=5000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of fastgivens_mat_right'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, res, d
    real(stnd), allocatable, dimension(:,:) :: a, syst
!
    integer :: iok, istart, iend, irate, imax, itime
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A n-BY-m REAL COEFFICIENT
!               MATRIX USING A LQ DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES PROBLEM
!
!                       Minimize || b - x*a ||_2
!
!               USING FAST GIVENS PLANE ROTATIONS APPLY FROM THE RIGHT. a IS A n-BY-m MATRIX WHICH IS ASSUMED
!               OF FULL RANK. THE RIGHT HAND SIDE b IS A m-ELEMENTS VECTOR AND THE SOLUTION VECTOR x IS A
!               n-ELEMENTS VECTOR.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), syst(np1,m), d(m), b(m), res(m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:n,:m) .
!
    call random_number( a(:n,:m) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
!   MAKE A COPY OF COEFFICIENT MATRIX a(:n,:m) .
!
    syst(:n,:m) = a(:n,:m)
!
!   MAKE A COPY OF RIGHT HAND-SIDE VECTOR b(:m) .
!
    syst(np1,:m) = b(:m)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                  x(:n)*a(:n,:m) ≈ b(:m)
!
!   FIRST TRANSFORM THE MATRIX a TO LOWER TRAPEZOIDAL FORM BY APPLYING 
!   A SERIES OF FAST GIVENS PLANE ROTATIONS ON THE COLUMNS OF a FROM THE RIGHT
!   AND APPLY THE ROTATIONS TO b .
!
    d(:m) = one
!
    call fastgivens_mat_right( syst(:np1,:m), d(:m) )
!    
!   SOLVE THE n-BY-n LOWER TRIANGULAR SYSTEM.
!    
    call triang_solve( syst(:n,:n), syst(np1,:n), upper=false, trans=true ) 
!    
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF a' .
!
    res(:m) = b(:m) - matmul( syst(np1,:n), a(:n,:m) )
    err = sum(abs(matmul(a(:n,:m),res(:m))) )/ sum( abs(a(:n,:m)) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       n, ' by ', m,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, syst, d, b, res )
!
!
! END OF PROGRAM ex1_fastgivens_mat_right
! =======================================
!
end program ex1_fastgivens_mat_right

ex1_fft.F90

program ex1_fft
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine FFT
!   in module FFT_Procedures for computing the Fast Fourier Transform (FFT)
!   of a complex sequence.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, one, true, false, init_fft, fft, end_fft,  &
                              merror, allocate_error
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE COMPLEX SEQUENCE.
!
    integer(i4b), parameter :: prtunit=6, n=1000024
!
    character(len=*), parameter :: name_proc='Example 1 of fft'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    real(stnd)                            :: err, eps, elapsed_time
    real(stnd), dimension(:), allocatable :: y
!
    complex(stnd), dimension(:), allocatable :: a, c
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FORWARD AND BACKWARD FFTS OF A COMPLEX SEQUENCE.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( y(2_i4b*n), a(n), c(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM NUMBER COMPLEX SEQUENCE.
!
    call random_number(y)
!
    a(:) = cmplx( y(1_i4b:n), y(n+1_i4b:2_i4b*n), kind=stnd)
!
!   SAVE THE COMPLEX SEQUENCE.
!
    c(:) = a(:)
!
!   INITIALIZE THE FFT SUBROUTINE.
!
    call init_fft( n )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   TRANSFORM AND THEN INVERT THE SEQUENCE BACK.
!
    call fft( a(:), forward=true  )
    call fft( a(:), forward=false )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    call end_fft()
!
!   CHECK THAT INVERSE(TRANSFORM(SEQUENCE)) = SEQUENCE.
!
    err = maxval(abs(c(:)-a(:)))/maxval(abs(c(:)))
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( y, a, c )
!
    if ( err<=eps  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i10,a,0pd12.4,a)')    &
      'The elapsed time for computing the forward and backward FFTs of a complex sequence of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_fft
! ======================
!
end program ex1_fft

ex1_fft_row.F90

program ex1_fft_row
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine FFT_ROW
!   in module FFT_Procedures for computing the Fast Fourier Transform (FFT)
!   of a complex sequence.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
  use Statpack, only : i4b, stnd, one, true, false, init_fft, fft_row, end_fft, &
                       merror, allocate_error
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE COMPLEX SEQUENCE.
!
    integer(i4b), parameter :: prtunit=6,  n=1000024
!
    character(len=*), parameter :: name_proc='Example 1 of fft_row'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer :: iok, istart, iend, irate, imax, itime
!
    real(stnd)                            :: err, eps, elapsed_time
    real(stnd), dimension(:), allocatable :: y
!
    complex(stnd), dimension(:), allocatable :: a, c
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FORWARD AND BACKWARD FFTS OF A COMPLEX SEQUENCE.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( y(2_i4b*n), a(n), c(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM NUMBER COMPLEX SEQUENCE.
!
    call random_number(y)
!
    a(:) = cmplx( y(1:n), y(n+1_i4b:2_i4b*n), kind=stnd)
!
!   SAVE THE COMPLEX SEQUENCE.
!
    c(:) = a(:)
!
!   INITIALIZE THE FFT SUBROUTINE.
!
    call init_fft( n )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   TRANSFORM AND THEN INVERT THE SEQUENCE BACK.
!
    call fft_row( a(:), forward=true  )
    call fft_row( a(:), forward=false )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    call end_fft()
!
!   CHECK THAT INVERSE(TRANSFORM(SEQUENCE)) = SEQUENCE.
!
    err = maxval(abs(c(:)-a(:)))/maxval(abs(c(:)))
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( y, a, c )
!
    if ( err<=eps  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i10,a,0pd12.4,a)')    &
      'The elapsed time for computing the forward and backward FFTs of a complex sequence of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_fft_row
! ==========================
!
end program ex1_fft_row

ex1_fftxy.F90

program ex1_fftxy
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine FFTXY
!   in module FFT_Procedures for computing the Fast Fourier Transforms (FFT)
!   of two real sequences by the FFT of a complex sequence.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, one, true, false, init_fft, fft, fftxy, &
                         end_fft, merror, allocate_error
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE REAL AND COMPLEX SEQUENCES.
!
    integer(i4b), parameter :: prtunit=6, n=1000024
!
    character(len=*), parameter :: name_proc='Example 1 of fftxy'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer :: iok, istart, iend, irate, imax, itime
!
    real(stnd)                            :: errx, erry, eps, elapsed_time
    real(stnd), dimension(:), allocatable :: x, y, x2, y2
!
    complex(stnd), dimension(:), allocatable :: fftx, ffty
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TESTS.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FORWARD FFTS OF TWO REAL SEQUENCES.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( errx ) )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( x(n), y(n), x2(n), y2(n), fftx(n), ffty(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE TWO RANDOM NUMBER REAL SEQUENCES.
!
    call random_number( x(:n) )
    call random_number( y(:n) )
!
!   SAVE THE REAL SEQUENCES.
!
    x2(:n) = x(:n)
    y2(:n) = y(:n)
!
!   INITIALIZE THE FFT SUBROUTINE.
!
    call init_fft( n )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   TRANSFORM AND THEN INVERT THE TWO REAL SEQUENCES BACK.
!
    call fftxy( x(:n), y(:n), fftx(:n), ffty(:n)  )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    call fft( fftx(:n), forward=false  )
    x(:n) = real( fftx(:n) )
!
    call fft( ffty(:n), forward=false )
    y(:n) = real( ffty(:n) )
!
    call end_fft()
!
!   CHECK THAT INVERSE(TRANSFORM(SEQUENCE)) = SEQUENCE.
!
    errx = maxval(abs(x2(:n)-x(:n)))/maxval(abs(x2(:n)))
    erry = maxval(abs(y2(:n)-y(:n)))/maxval(abs(y2(:n)))
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( x, y, x2, y2, fftx, ffty )
!
!   PRINT RESULT OF THE TEST.
!
    if (max(errx,erry)<=eps  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i10,a,0pd12.4,a)')    &
      'The elapsed time for computing the forward FFTs of two real sequences of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_fftxy
! ========================
!
end program ex1_fftxy

ex1_freq_func.F90

program ex1_freq_func
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of functions/subroutines BD_COEF
!   and FREQ_FUNC in module Time_Series_Procedures for computing the frequency response
!   of a given Lanczos linear filter.
!
!
! LATEST REVISION : 09/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, zero, half, one, true, merror, allocate_error,   &
                         bd_coef, freq_func, init_fft, fft, end_fft
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=200
!
    character(len=*), parameter :: name_proc='Example 1 of freq_func'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err, fch, fcl
    real(stnd), dimension(n)              :: freqr, freqr2, coef2
    real(stnd), dimension(:), allocatable :: coef
!
    complex(stnd), dimension(n)           :: coefc
!
    integer(i4b) :: k, k1, k2, pl, ph, khalf, kmid
!   
    integer :: iok
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   pl IS THE MINIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   ph IS THE MAXIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   pl AND ph ARE EXPRESSED IN NUMBER OF OBSERVATIONS, I.E. pl=6(18) and ph=32(96) SELECT
!   PERIODS BETWEEN 1.5 YRS AND 8 YRS FOR QUARTERLY (MONTHLY) DATA.
!
    pl  = 18
    ph  = 96
!
!   COMPUTE THE CORRESPONDING CUTOFF FREQUENCIES.
!
    fch = one/real( ph, stnd )
    fcl = one/real( pl, stnd )
!
!   NOW SELECT THE OPTIMAL NUMBER OF FILTER COEFFICIENTS k FOR THE LANCZOS FILTER.
!
    k1  = ceiling( one/(half-fcl) )
    k2  = ceiling(  2.6/(fcl-fch) )
    k   = max( k1, k2, ph+1 )
!
!   CHECK IF k IS ODD.
!
    if ( (k/2)*2==k  ) k = k + 1
!
!   ALLOCATE WORK ARRAY FOR THE FILTER COEFFICIENTS.
!
    allocate( coef(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   FUNCTION bd_coef COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- BAND PASS FILTER WITH CUTOFF PERIODS PL AND PH (EG CUTOFF FREQUENCIES 1/PL AND 1/PH).
!
    coef(:k) = bd_coef( pl=pl, ph=ph, k=k )
!
!   NOW COMPUTE THE TRANSFERT FUNCTION freqr(:) OF THE LANCZOS FILTER AT THE FOURIER FREQUENCIES.
!
    call freq_func( nfreq=n, coef=coef(:k), freqr=freqr(:n), four_freq=true )
!
!   NOW, COMPUTE THE TRANSFERT FUNCTION DIRECTLY WITH THE FFT.
!
    kmid  = ( k + 1 )/2
    khalf = ( k - 1 )/2
!
    coef2(:n)          = zero
    coef2(:kmid)       = coef(kmid:k)
    coef2(n-khalf+1:n) = coef(1:khalf)
!
!   FIRST INITIALIZE THE FFT SUBROUTINE.
!
    call init_fft( n )
!
!   TRANSFORM THE TIME SERIES.
!
    coefc(:n) = cmplx( coef2(:n), zero, kind=stnd )
!
    call fft( coefc(:n), forward=true  )
!
    freqr2(:n) = real( coefc(:n),  kind=stnd )
!
!   DEALLOCATE WORK ARRAYS.
!
    call end_fft()
!
    deallocate( coef )
!
!   TEST THE ACCURACY OF THE RESULTS.
!
    err = maxval(abs(freqr(:n)-freqr2(:n)))/maxval(abs(freqr(:n)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_freq_func
! ============================
!
end program ex1_freq_func

ex1_gchol_cmp.F90

program ex1_gchol_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines GCHOL_CMP and CHOL_SOLVE
!   in module Lin_Procedures for solving a linear system with a real symmetric semi-positive definite
!   coefficient matrix and one right hand side with a Cholesky decomposition.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, c10, true, false, gchol_cmp, &
                         chol_solve, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIC SEMI-POSITIVE DEFINITE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=4000, m=n-10
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!
    real(stnd), parameter :: fudge=c10
!
    character(len=*), parameter :: name_proc='Example 1 of gchol_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, tol, d1, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, c
    real(stnd), dimension(:),   allocatable :: invdiag, b, b2, d, res
!
    integer(i4b) :: krank
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test, upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n SYMMETRIC SEMI-DEFINITE POSITIVE
!               MATRIX AND ONE RIGHT HAND-SIDE WITH THE CHOLESKY DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    tol = sqrt( epsilon( err ) )
    eps = fudge*tol
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   SPECIFY IF THE UPPER OR LOWER PART OF THE SYMMETRIC MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( c(m,n), a(n,n), b(n), invdiag(n), d(m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM SYMMETRIC POSITIVE SEMIDEFINITE MATRIX a .
!
    call random_number( c )
!
    a = matmul( transpose(c), c )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b BELONGING TO THE RANGE OF a.
!
    call random_number( d )
!
    b = matmul( transpose(c), d )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS .
!
        allocate(  a2(n,n), b2(n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE COEFFICIENT MATRIX AND RIGHT HAND-SIDE VECTOR .
!
        a2(:n,:n) = a(:n,:n)
        b2(:n)    = b(:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION VECTOR FOR SYMMETRIC POSITIVE SEMIDEFINITE
!   SYSTEM
!
!                     a*x = b .
!
!   BY COMPUTING THE CHOLESKY DECOMPOSITION OF MATRIX a .
!   IF ON OUTPUT OF gchol_cmp d1 IS GREATER OR EQUAL TO ZERO
!   THEN THE SYMMETRIC LINEAR SYSTEM CAN BE SOLVED BY 
!   SUBROUTINE chol_solve.
!
    call gchol_cmp( a, invdiag, krank, d1, tol=tol, upper=upper )
!
    if ( d1<zero ) then
!
!       ANORMAL EXIT FROM gchol_cmp SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of GCHOL_CMP subroutine, d1=', d1
        write (prtunit,*)
!
    else
!
!       SOLVE THE SYMMETRIC LINEAR SYSTEM.
!
        call chol_solve( a, invdiag, b, upper=upper )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1>=zero .and. do_test ) then
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n) = b2(:n) - matmul( a2, b(:n) )
        err = norm(res) / ( real(n,stnd)*( norm(a2) + norm(b2) ) )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
        deallocate( a, b, c, d, invdiag, a2, b2, res )
    else
        deallocate( a, b, c, d, invdiag )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1>=zero ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a real symmetric semi-positive definite system of size ', &
       n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_gchol_cmp
! ============================
!
end program ex1_gchol_cmp

ex1_gchol_cmp2.F90

program ex1_gchol_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine GCHOL_CMP2
!   in module Lin_Procedures for computing a symmetric generalized inverse
!   of a real symmetric semi-positive definite matrix.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, c10, true, false, gchol_cmp2,  &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIC SEMI-POSITIVE DEFINITE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=4000, m=n-10_i4b
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!
    real(stnd), parameter :: fudge=c10
!
    character(len=*), parameter :: name_proc='Example 1 of gchol_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, tol, d1, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, ainv, c, res
    real(stnd), dimension(:),   allocatable :: invdiag
!
    integer(i4b) :: j, krank
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test, upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTING A SYMMETRIC GENERALIZED INVERSE OF A REAL n-BY-n SYMMETRIC SEMI-POSITIVE
!               DEFINITE MATRIX WITH THE CHOLESKY DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    tol = sqrt( epsilon( err ) )
    eps = fudge*tol
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   SPECIFY IF THE UPPER OR LOWER PART OF THE SYMMETRIC MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( c(m,n), a(n,n), ainv(n,n), invdiag(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM SYMMETRIC POSITIVE SEMIDEFINITE MATRIX a .
!
    call random_number( c )
!
    a = matmul( transpose(c), c )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE SYMMETRIC POSITIVE SEMIDEFINITE MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A GENERALIZED INVERSE OF A SYMMETRIC POSITIVE SEMIDEFINITE
!   MATRIX a BY USING THE CHOLESKY DECOMPOSITION OF a .
!
!   IF ON OUTPUT OF gchol_cmp2 d1 IS GREATER OR EQUAL TO ZERO
!   THEN THE SYMMETRIC MATRIX IS POSITIVE SEMIDEFINITE AND A
!   SYMMETRIC GENERALIZED INVERSE OF a HAS BEEN COMPUTED.
!
    call gchol_cmp2( a, invdiag, krank, d1, tol=tol, matinv=ainv, upper=upper, fill=true )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1<zero ) then
!
!       ANORMAL EXIT FROM gchol_cmp2 SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of GCHOL_CMP2 subroutine, d1=', d1
        write (prtunit,*)
!
    else if ( do_test ) then
!
!       CHECK THE IDENTITIES a*ainv*a = a AND  ainv*a*ainv = ainv ,
!       WHICH DEFINE THE GENERALIZED INVERSE OF a.
!
        res  = matmul(a2, matmul(ainv,a2)) - a2
        err1 = norm(res) / ( real(n,stnd)*norm(a2) )
!
        res = matmul(ainv, matmul(a2,ainv)) - ainv
        err2 = norm(res) / ( real(n,stnd)*norm(ainv) )
!
        err = max( err1, err2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
        deallocate( a, ainv, c, invdiag, a2, res )
    else
        deallocate( a, ainv, c, invdiag )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1>=zero ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a (generalized) inverse of a real symmetric semi-positive definite matrix of size ', &
       n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_gchol_cmp2
! =============================
!
end program ex1_gchol_cmp2

ex1_gen_random_mat.F90

program ex1_gen_random_mat
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine GEN_RANDOM_MAT
!   in module Random.
!                                                                            
! LATEST REVISION : 02/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, true, false, zero, c50,          &
                         unit_matrix, norm, random_seed_, random_number_, &
                         gen_random_mat, singval_sort, merror,            &
                         allocate_error
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED PSEUDO-RANDOM MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO k=min(m,n)).
!
    integer(i4b), parameter :: prtunit = 6, m=105, n=50, k=min(m,n), nsvd0=50
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of gen_random_mat'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, leftvec, rightvec, resid
    real(stnd), dimension(:), allocatable   :: s, resnorm
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: hous, do_test
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : GENERATION OF A RANDOM REAL MATRIX WITH A SPECIFIED SPECTRUM OF SINGULAR VALUES.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY HOW THE SINGULAR VECTORS WILL BE GENERATED.
!
    hous = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), s(nsvd0), leftvec(m,nsvd0), rightvec(n,nsvd0), &
              resid(m,nsvd0), resnorm(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES FROM THE UNIFORM DISTRIBUTION.
!
    call random_number_( s(:nsvd0) )
!
!   SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
    call singval_sort( 'D', s(:nsvd0) )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
    anorm = norm( s(:nsvd0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH THE SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
    call gen_random_mat( s(:nsvd0), a(:m,:n), leftvec=leftvec(:m,:nsvd0), &
                         rightvec=rightvec(:n,:nsvd0), hous=hous   )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a(:m,:n)*rightvec(:n,:nsvd0) - leftvec(:m,:nsvd0)*S(:nsvd0,:nsvd0).
!
    resid(:m,:nsvd0) = matmul( a, rightvec ) - leftvec*spread(s,dim=1,ncopies=m)
    resnorm(:nsvd0) = norm( resid(:m,:nsvd0), dim=2_i4b )
!
    err1 =  maxval( resnorm(:nsvd0) )/( anorm*real(m,stnd) )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - leftvec**(t)*leftvec.
!
    call unit_matrix( a(:nsvd0,:nsvd0) )
!
    resid(:nsvd0,:nsvd0) = abs( a(:nsvd0,:nsvd0) - matmul( transpose(leftvec), leftvec ) )
!
    err2 = maxval( resid(:nsvd0,:nsvd0) )/real(m,stnd)
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - rightvec**(t)*rightvec.
!
    resid(:nsvd0,:nsvd0) = abs( a(:nsvd0,:nsvd0) - matmul( transpose(rightvec), rightvec ) )
!
    err3 = maxval( resid(:nsvd0,:nsvd0) )/real(n,stnd)
!
    err = max( err1, err2, err3 )
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, s, leftvec, rightvec, resid, resnorm )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')                           &
      'The elapsed time for generating a ', m, ' by ', n,         &
      ' pseudo-random real matrix with a specified spectrum is ', &
      elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_gen_random_mat
! =================================
!
end program ex1_gen_random_mat

ex1_gen_random_ortho_mat.F90

program ex1_gen_random_ortho_mat
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine GEN_RANDOM_ORTHO_MAT
!   in module Random.
!                                                                            
! LATEST REVISION : 02/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, zero, c50,                         &
                         unit_matrix, random_seed_, random_number_,    &
                         gen_random_ortho_mat, merror, allocate_error
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!
#ifdef _TRANSPOSE
    use Utilities, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED ORTHOGONAL MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=105, n=50
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of gen_random_ortho_mat'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, ulp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : GENERATION OF A REAL ORTHOGONAL MATRIX USING THE HOUSEHOLDER METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), resid(n,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   GENERATE A RANDOM m-BY-n ORTHOGONAL MATRIX H WITH FUNCTION gen_random_ortho_mat.
!
    a(:m,:n) = gen_random_ortho_mat( m, n )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - H**(t)*H.
!
    call unit_matrix( resid(:n,:n) )
!
    resid(:n,:n) = abs( resid(:n,:n) - matmul( transpose(a(:m,:n)), a(:m,:n) ) )
!
    err = maxval( resid(:n,:n) )/real(n,stnd)
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, resid )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a ', &
       m, ' by ', n,' real orthogonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_gen_random_ortho_mat
! =======================================
!
end program ex1_gen_random_ortho_mat

ex1_gen_random_sym_mat.F90

program ex1_gen_random_sym_mat
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine GEN_RANDOM_SYM_MAT
!   in module Random.
!                                                                            
! LATEST REVISION : 02/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, true, false, zero, c50,          &
                         unit_matrix, norm, random_seed_, random_number_, &
                         gen_random_sym_mat, eigval_sort, merror,         &
                         allocate_error
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED PSEUDO-RANDOM SYMMETRIC MATRIX,
! neig0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, neig0=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of gen_random_sym_mat'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, eigvec, resid
    real(stnd), dimension(:), allocatable   :: s, resnorm
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: hous, do_test
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : GENERATION OF A RANDOM REAL SYMMETRIC MATRIX WITH A SPECIFIED SPECTRUM OF EIGENVALUES.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY HOW THE EIGENVECTORS WILL BE GENERATED.
!
    hous = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), s(neig0), eigvec(n,neig0),       &
              resid(n,neig0), resnorm(neig0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE EIGENVALUES FROM THE UNIFORM DISTRIBUTION.
!
    call random_number_( s(:neig0) )
!
!   SORT THE EIGENVALUES BY DECREASING MAGNITUDE.
!
    call eigval_sort( 'D', s(:neig0) )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
    anorm = norm( s(:neig0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH THE SPECIFIED DISTRIBUTION
!   OF EIGENVALUES AND A RANK EQUAL TO neig0.
!
    call gen_random_sym_mat( s(:neig0), a(:n,:n), eigvec=eigvec(:n,:neig0), hous=hous )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a(:n,:n)*eigvec(:n,:neig0) - eigvec(:n,:neig0)*S(:neig0,:neig0).
!
    resid(:n,:neig0) = matmul( a, eigvec ) - eigvec*spread(s,dim=1,ncopies=n)
    resnorm(:neig0) = norm( resid(:n,:neig0), dim=2_i4b )
!
    err1 =  maxval( resnorm(:neig0) )/( anorm*real(n,stnd) )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
    call unit_matrix( a(:neig0,:neig0) )
!
    resid(:neig0,:neig0) = abs( a(:neig0,:neig0) - matmul( transpose(eigvec), eigvec ) )
!
    err2 = maxval( resid(:neig0,:neig0) )/real(n,stnd)
!
    err = max( err1, err2 )
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, s, eigvec, resid, resnorm )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')                                     &
      'The elapsed time for generating a ', n, ' by ', n,                   &
      ' pseudo-random real symmetric matrix with a specified spectrum is ', &
      elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_gen_random_sym_mat
! =====================================
!
end program ex1_gen_random_sym_mat

ex1_ginv.F90

program ex1_ginv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function GINV
!   in module SVD_Procedures for computing a generalized inverse of a real matrix.
!                                                                              
!   The generalized inverse is computed with the SVD of the real matrix and the
!   computations are parallelized if OpenMP is used.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, ginv, norm, &
                         c10, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=2000, k=min(m,n)
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c10
!
    character(len=*), parameter :: name_proc='Example 1 of ginv'
!   
!   
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: eps, anorm, err, err1, err2, err3, err4, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, ainv, ainv2, a_by_ainv, ainv_by_a
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTING THE GENERALIZED INVERSE OF A m-BY-n REAL MATRIX USING
!               THE SINGULAR VALUE DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*sqrt( epsilon(eps) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), ainv(n,m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX.
!
    call random_number( a )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE GENERALIZED INVERSE OF a(:m,:n) WITH FUNCTION ginv.
!
    ainv(:n,:m) = ginv( a(:m,:n) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), ainv2(n,m), a_by_ainv(m,m),   &
                  ainv_by_a(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FROBENIUS NORM OF THE DATA MATRIX.
!
        anorm = norm( a )
!
!       COMPUTE ainv*a AND a*ainv*a .
!
        ainv_by_a = matmul( ainv, a )
        a2        = matmul( a, ainv_by_a )
!
!       COMPUTE a*ainv AND ainv*a*ainv .
!
        a_by_ainv = matmul( a, ainv )
        ainv2     = matmul( ainv, a_by_ainv )
!
!       CHECK THE Moore-Penrose EQUATIONS :
!
!             a*ainv*a = a            (1)
!          ainv*a*ainv = ainv         (2)
!            (a*ainv)' = a*ainv       (3)
!            (ainv*a)' = ainv*a       (4)
!
        err1 = norm( a - a2 )
        err2 = norm( ainv - ainv2 )
        err3 = norm( a_by_ainv - transpose(a_by_ainv) )
        err4 = norm( ainv_by_a - transpose(ainv_by_a) )
!
        err = max( err1, err2, err3, err4 )/ ( real(k,stnd)*anorm )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, ainv2, a_by_ainv, ainv_by_a )        
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, ainv )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the generalized inverse of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_ginv
! =======================
!
end program ex1_ginv

ex1_givens_mat_left.F90

program ex1_givens_mat_left
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine GIVENS_MAT_LEFT 
!   in module Giv_Procedures and subroutine triang_solve in module Lin_Procedures for
!   solving a full rank linear least squares problem by a QR factorization. The QR
!   factorization is computed by the application of a series of Givens rotations
!   on the left of the coefficient matrix of the problem.
!
!
! LATEST REVISION : 01/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, true, false, zero, c50, allocate_error, merror,  &
                         givens_mat_left, triang_solve
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX AND
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=5000, n=1000, np1=n+1
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of givens_mat_left'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, res
    real(stnd), allocatable, dimension(:,:) :: a, syst
!
    integer :: iok, istart, iend, irate, imax, itime
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A QR DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES PROBLEM
!
!                       Minimize || b - a*x ||_2
!
!               USING GIVENS PLANE ROTATIONS. a IS A m-BY-n MATRIX WHICH IS ASSUMED OF FULL RANK.
!               THE RIGHT HAND SIDE b IS A m-ELEMENTS VECTOR AND THE SOLUTION VECTOR x IS A n-ELEMENTS
!               VECTOR.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), syst(m,np1), b(m), res(m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
!   MAKE A COPY OF COEFFICIENT MATRIX a(:m,:n) .
!
    syst(:m,:n) = a(:m,:n)
!
!   MAKE A COPY OF RIGHT HAND-SIDE VECTOR b(:m) .
!
    syst(:m,np1) = b(:m)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                  a(:m,:n)*x(:n) ≈ b(:m)
!
!   FIRST TRANSFORM THE MATRIX a TO UPPER TRAPEZOIDAL FORM BY APPLYING 
!   A SERIES OF GIVENS PLANE ROTATIONS ON THE ROWS OF a FROM THE LEFT
!   AND APPLY THE ROTATIONS TO b .
!
    call givens_mat_left( syst(:m,:np1) )
!    
!   SOLVE THE n-BY-n UPPER TRIANGULAR SYSTEM.
!    
    call triang_solve( syst(:n,:n), syst(:n,np1), upper=true, trans=false ) 
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF a .
!
    res(:m) = b(:m) - matmul( a(:m,:n), syst(:n,np1) )
    err = sum(abs(matmul(res(:m)  ,a(:m,:n))) )/ sum( abs(a(:m,:n)) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, syst, b, res )
!
!
! END OF PROGRAM ex1_givens_mat_left
! ==================================
!
end program ex1_givens_mat_left

ex1_givens_mat_right.F90

program ex1_givens_mat_right
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine GIVENS_MAT_RIGHT
!   in module Giv_Procedures and subroutine triang_solve in module Lin_Procedures for
!   solving a full rank linear least squares problem by a LQ factorization. The LQ
!   factorization is computed by the application of a series of Givens rotations
!   on the right of the coefficient matrix of the problem. 
!
!
! LATEST REVISION : 01/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, true, false, zero, c50, allocate_error, merror,  &
                         givens_mat_right, triang_solve
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! n IS THE NUMBER OF ROWS OF THE RANDOM MATRIX AND
! m IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=1000, np1=n+1, m=5000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of givens_mat_right'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, res
    real(stnd), allocatable, dimension(:,:) :: a, syst
!
    integer :: iok, istart, iend, irate, imax, itime
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A n-BY-m REAL COEFFICIENT
!               MATRIX USING A LQ DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES PROBLEM
!
!                       Minimize || b - x*a ||_2
!
!               USING GIVENS PLANE ROTATIONS APPLY FROM THE RIGHT. a IS A n-BY-m MATRIX WHICH IS ASSUMED OF
!               FULL RANK. THE RIGHT HAND SIDE b IS A m-ELEMENTS VECTOR AND THE SOLUTION VECTOR x IS A
!               n-ELEMENTS VECTOR.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), syst(np1,m), b(m), res(m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:n,:m) .
!
    call random_number( a(:n,:m) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
!   MAKE A COPY OF COEFFICIENT MATRIX a(:n,:m) .
!
    syst(:n,:m) = a(:n,:m)
!
!   MAKE A COPY OF RIGHT HAND-SIDE VECTOR b(:m) .
!
    syst(np1,:m) = b(:m)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                  x(:n)*a(:n,:m) ≈ b(:m)
!
!   FIRST TRANSFORM THE MATRIX a TO LOWER TRAPEZOIDAL FORM BY APPLYING 
!   A SERIES OF GIVENS PLANE ROTATIONS ON THE COLUMNS OF a FROM THE RIGHT
!   AND APPLY THE ROTATIONS TO b .
!
    call givens_mat_right( syst(:np1,:m) )
!    
!   SOLVE THE n-BY-n LOWER TRIANGULAR SYSTEM.
!    
    call triang_solve( syst(:n,:n), syst(np1,:n), upper=false, trans=true ) 
!    
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF a' .
!
    res(:m) = b(:m) - matmul( syst(np1,:n), a(:n,:m) )
    err = sum(abs(matmul(a(:n,:m),res(:m))) )/ sum( abs(a(:n,:m)) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       n, ' by ', m,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, syst, b, res )
!
!
! END OF PROGRAM ex1_givens_mat_right
! ===================================
!
end program ex1_givens_mat_right

ex1_h1.F90

program ex1_h1
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of H1 and APPLY_H1
!   in module Hous_Procedures. 
!                                                                              
! LATEST REVISION : 19/03/2022
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, safmin, machhuge, h1, apply_h1, &
                         triang_solve, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE SIZE OF THE LINEAR SYSTEM TO BE SOLVED.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!
    character(len=*), parameter :: name_proc='Example 1 of h1'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, eps, tau, beta, tmp, d1, normx, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a
    real(stnd), dimension(:),  allocatable  :: b, x, res
!
    integer(i4b) :: i
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n MATRIX
!               AND ONE RIGHT HAND-SIDE WITH THE QR DECOMPOSITION.
!               THE QR DECOMPOSITION IS COMPUTED BY THE HOUSEHOLDER
!               METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n), x(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    a = a - half
!
    call random_number( tmp )
!
    if ( tmp>safmin ) then
        a = a/tmp
    end if
!
!   GENERATE A n-ELEMENT RANDOM SOLUTION VECTOR x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-VECTOR PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION VECTOR FOR LINEAR SYSTEM
!
!         a(:n,:n)*x(:n) = b(:n) .
!    
!   TRANSFORM THE MATRIX a TO UPPER TRIANGULAR FORM BY APPLYING 
!   A SERIE OF HOUSEHOLDER REFLECTIONS ON THE COLUMNS OF a AND APPLY
!   THE TRANSFORMATIONS TO b .
!
    d1 = machhuge
!
    do i = 1_i4b, n
!
        call h1( a(i:n,i), beta, tau )
        call apply_h1( a(i:n,i), tau, a(i:n,i+1_i4b:n), left=true )
        call apply_h1( a(i:n,i), tau, b(i:n) )
!
        a(i,i) = beta
!
        d1 = min( d1, abs(beta) )
!
    end do
!    
    if ( d1>safmin ) then
!    
!       SOLVE THE n-BY-n RESULTING UPPER TRIANGULAR SYSTEM IF THE LINEAR SYSTEM
!       IS NOT SINGULAR WITH SUBROUTINE triang_solve .
!    
        call triang_solve( a(:n,:n), b(:n) )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1>safmin .and. do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n) = b(:n) - x(:n)
        err     = norm(res)/norm(x)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1>safmin ) then
!
        write (prtunit,*) name_proc//' is correct'
!
        if ( do_test ) then
!
            write (prtunit,*) 
!
!           PRINT RELATIVE ERROR OF COMPUTED SOLUTION.
!
            write (prtunit,*) 'Relative error of the computed solution = ', err
!
        end if
!
    else
!
        write (prtunit,*) name_proc//' is incorrect'
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the solution of a linear real system of size ', &
       n, ' by ', n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_h1
! =====================
!
end program ex1_h1

ex1_h2.F90

program ex1_h2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of H2 and APPLY_H2
!   in module Hous_Procedures. 
!                                                                              
! LATEST REVISION : 18/03/2022
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, safmin, machhuge, h2, apply_h2, &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE SIZE OF THE LINEAR SYSTEM TO BE SOLVED.
!
    integer(i4b), parameter :: prtunit=6, n=1000
!
    character(len=*), parameter :: name_proc='Example 1 of h2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, eps, tau, up, tmp, d1, normx, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a
    real(stnd), dimension(:),  allocatable  :: b, x, res
!
    integer(i4b) :: i
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n MATRIX
!               AND ONE RIGHT HAND-SIDE WITH THE QR DECOMPOSITION.
!               THE QR DECOMPOSITION IS COMPUTED BY THE HOUSEHOLDER
!               METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n), x(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    a = a - half
!
    call random_number( tmp )
!
    if ( tmp>safmin ) then
        a = a/tmp
    end if
!
!   GENERATE A n-ELEMENT RANDOM SOLUTION VECTOR x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-VECTOR PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION VECTOR FOR LINEAR SYSTEM
!
!         a(:n,:n)*x(:n) = b(:n) .
!    
!   TRANSFORM THE MATRIX a TO UPPER TRIANGULAR FORM BY APPLYING 
!   A SERIE OF HOUSEHOLDER REFLECTIONS ON THE COLUMNS OF a AND APPLY
!   THE TRANSFORMATIONS TO b .
!
    d1 = machhuge
!
    do i = 1_i4b, n
!
        call h2( a(i,i), a(i+1_i4b:n,i), up, tau )
        call apply_h2( a(i+1_i4b:n,i), up, tau,    &
                       a(i,i+1_i4b:n), a(i+1_i4b:n,i+1_i4b:n), left=true )
        call apply_h2( a(i+1_i4b:n,i), up, tau, b(i), b(i+1_i4b:n) )
!
        d1 = min( d1, abs(a(i,i)) )
!
    end do
!    
    if ( d1>safmin ) then
!    
!       SOLVE THE n-BY-n RESULTING UPPER TRIANGULAR SYSTEM IF THE LINEAR SYSTEM
!       IS NOT SINGULAR.
!    
        do i = n, 1_i4b, -1_i4b
!    
            b(i) = b(i)/a(i,i)
            b(1_i4b:i-1_i4b) = b(1_i4b:i-1_i4b) - b(i)*a(1_i4b:i-1_i4b,i)
!    
        end do
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1>safmin .and. do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n) = b(:n) - x(:n)
        err     = norm(res)/norm(x)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1>safmin ) then
!
        write (prtunit,*) name_proc//' is correct'
!
        if ( do_test ) then
!
            write (prtunit,*) 
!
!           PRINT RELATIVE ERROR OF COMPUTED SOLUTION.
!
            write (prtunit,*) 'Relative error of the computed solution = ', err
!
        end if
!
    else
!
        write (prtunit,*) name_proc//' is incorrect'
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the solution of a linear real system of size ', &
       n, ' by ', n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_h2
! =====================
!
end program ex1_h2

ex1_hous1.F90

program ex1_hous1
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of HOUS1 and APPLY_HOUS1
!   in module Hous_Procedures. 
!                                                                              
! LATEST REVISION : 20/03/2022
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, safmin, machhuge,      &
                         triang_solve, hous1, apply_hous1,  norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE SIZE OF THE LINEAR SYSTEM TO BE SOLVED.
!
    integer(i4b), parameter :: prtunit=6, n=1000
!
    character(len=*), parameter :: name_proc='Example 1 of hous1'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, eps, tau, beta, tmp, d1, normx, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a
    real(stnd), dimension(:),  allocatable  :: b, x, res
!
    integer(i4b) :: i
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n MATRIX
!               AND ONE RIGHT HAND-SIDE WITH THE QR DECOMPOSITION.
!               THE QR DECOMPOSITION IS COMPUTED BY THE HOUSEHOLDER
!               METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n), x(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    a = a - half
!
    call random_number( tmp )
!
    if ( tmp>safmin ) then
        a = a/tmp
    end if
!
!   GENERATE A n-ELEMENT RANDOM SOLUTION VECTOR x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-VECTOR PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION VECTOR FOR LINEAR SYSTEM
!
!         a(:n,:n)*x(:n) = b(:n) .
!    
!   TRANSFORM THE MATRIX a TO UPPER TRIANGULAR FORM BY APPLYING 
!   A SERIE OF HOUSEHOLDER REFLECTIONS ON THE COLUMNS OF a AND APPLY
!   THE TRANSFORMATIONS TO b .
!
    d1 = machhuge
!
    do i = 1_i4b, n
!
        call hous1( a(i:n,i), tau, beta )
        call apply_hous1( a(i:n,i), tau, a(i:n,i+1_i4b:n), left=true )
        call apply_hous1( a(i:n,i), tau, b(i:n) )
!
        a(i,i) = beta
!
        d1 = min( d1, abs(beta) )
!
    end do
!    
    if ( d1>safmin ) then
!    
!       SOLVE THE n-BY-n RESULTING UPPER TRIANGULAR SYSTEM IF THE LINEAR SYSTEM
!       IS NOT SINGULAR WITH SUBROUTINE triang_solve .
!    
        call triang_solve( a(:n,:n), b(:n) )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1>safmin .and. do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n) = b(:n) - x(:n)
        err     = norm(res)/norm(x)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1>safmin ) then
!
        write (prtunit,*) name_proc//' is correct'
!
        if ( do_test ) then
!
            write (prtunit,*) 
!
!           PRINT RELATIVE ERROR OF COMPUTED SOLUTION.
!
            write (prtunit,*) 'Relative error of the computed solution = ', err
!
        end if
!
    else
!
        write (prtunit,*) name_proc//' is incorrect'
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the solution of a linear real system of size ', &
       n, ' by ', n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_hous1
! ========================
!
end program ex1_hous1

ex1_hous2.F90

program ex1_hous2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of HOUS2 and APPLY_HOUS2
!   in module Hous_Procedures. 
!                                                                              
! LATEST REVISION : 18/03/2022
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, safmin, machhuge, &
                         hous2, apply_hous2,  norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE SIZE OF THE LINEAR SYSTEM TO BE SOLVED.
!
    integer(i4b), parameter :: prtunit=6, n=1000
!
    character(len=*), parameter :: name_proc='Example 1 of hous2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, eps, tau, tmp, d1, normx, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a
    real(stnd), dimension(:),  allocatable  :: b, x, res
!
    integer(i4b) :: i
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n MATRIX
!               AND ONE RIGHT HAND-SIDE WITH THE QR DECOMPOSITION.
!               THE QR DECOMPOSITION IS COMPUTED BY THE HOUSEHOLDER
!               METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n), x(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    a = a - half
!
    call random_number( tmp )
!
    if ( tmp>safmin ) then
        a = a/tmp
    end if
!
!   GENERATE A n-ELEMENT RANDOM SOLUTION VECTOR x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-VECTOR PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION VECTOR FOR LINEAR SYSTEM
!
!         a(:n,:n)*x(:n) = b(:n) .
!    
!   TRANSFORM THE MATRIX a TO UPPER TRIANGULAR FORM BY APPLYING 
!   A SERIE OF HOUSEHOLDER REFLECTIONS ON THE COLUMNS OF a AND APPLY
!   THE TRANSFORMATIONS TO b .
!
    d1 = machhuge
!
    do i = 1_i4b, n
!
        call hous2( a(i,i), a(i+1_i4b:n,i), tau )
        call apply_hous2( a(i+1_i4b:n,i), tau, a(i,i+1_i4b:n), a(i+1_i4b:n,i+1_i4b:n), left=true )
        call apply_hous2( a(i+1_i4b:n,i), tau, b(i), b(i+1_i4b:n) )
!
        d1 = min( d1, abs(a(i,i)) )
!
    end do
!    
    if ( d1>safmin ) then
!    
!       SOLVE THE n-BY-n RESULTING UPPER TRIANGULAR SYSTEM IF THE LINEAR SYSTEM
!       IS NOT SINGULAR.
!    
        do i = n, 1_i4b, -1_i4b
!    
            b(i) = b(i)/a(i,i)
            b(1_i4b:i-1_i4b) = b(1_i4b:i-1_i4b) - b(i)*a(1_i4b:i-1_i4b,i)
!    
        end do
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1>safmin .and. do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n) = b(:n) - x(:n)
        err     = norm(res)/norm(x)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1>safmin ) then
!
        write (prtunit,*) name_proc//' is correct'
!
        if ( do_test ) then
!
            write (prtunit,*) 
!
!           PRINT RELATIVE ERROR OF COMPUTED SOLUTION.
!
            write (prtunit,*) 'Relative error of the computed solution = ', err
!
        end if
!
    else
!
        write (prtunit,*) name_proc//' is incorrect'
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the solution of a linear real system of size ', &
       n, ' by ', n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_hous2
! ========================
!
end program ex1_hous2

ex1_hp_coef.F90

program ex1_hp_coef
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines/functions HP_COEF
!   and SYMLIN_FILTER in module Time_Series_Procedures for high-pass filtering a time
!   series with a Lanczos filter.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, zero, half, one, two, pi, true, false, merror, allocate_error,   &
                         arth, hp_coef, symlin_filter, init_fft, fft, end_fft
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=2000
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err, tmp, fch
    real(stnd), dimension(n)              :: y, y2, y3, freq, freqr, tmpvec
    real(stnd), dimension(:), allocatable :: coef
!
    complex(stnd), dimension(n)           :: yc
!
    integer(i4b) :: i, k, k1, k2, ph, nfilt, khalf, kmid
!   
    integer :: iok
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of hp_coef'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
!
!   ph IS THE MAXIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   ph IS EXPRESSED IN NUMBER OF OBSERVATIONS, I.E. pc==32(96) SELECT
!   A PERIOD OF 8 YRS FOR QUARTERLY (MONTHLY) DATA.
!
    ph  = 32
!
!   COMPUTE THE CORRESPONDING CUTOFF FREQUENCY fch .
!
    fch = one/real( ph, stnd )
!
!   NOW SELECT THE OPTIMAL NUMBER OF FILTER COEFFICIENTS k FOR THE LANCZOS FILTER.
!
    i = ceiling( one/(half-fch) )
    k = max( i, ph+1 )
!
!   CHECK IF k IS ODD.
!
    if ( (k/2)*2==k  ) k = k + 1
!
!   ALLOCATE WORK ARRAY FOR THE FILTER COEFFICIENTS.
!
    allocate( coef(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   FUNCTION hp_coef COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- HIGH-PASS FILTER WITH CUTOFF PERIODS PH (E.G. CUTOFF FREQUENCY 1/PH).
!
    coef(:k) = hp_coef( ph=ph, k=k )
!
!   SUBROUTINE symlin_filter PERFORMS A SYMMETRIC FILTERING OPERATION ON AN IMPUT
!   TIME SERIES (THE ARGUMENT VEC).
!
!   HERE symlin_filter FILTERS THE TIME SERIES .
!   NOTE THAT (size(COEF)-1)/2 DATA POINTS WILL BE LOST FROM EACH END OF THE SERIES SO THAT
!   NFILT (NFILT= size(VEC) - size(COEF) + 1) TIME OBSERVATIONS ARE RETURNED IN VEC(:NFILT)
!   AND THE REMAINING OBSERVATIONS ARE SET TO ZERO.
!
    call symlin_filter( vec=y2(:n), coef=coef(:k),  nfilt=nfilt )
!
!   NOW COMPUTE THE TRANSFERT FUNCTION freqr(:) OF THE LANCZOS FILTER AT THE FOURIER FREQUENCIES.
!
    kmid  = ( k + 1 )/2
    khalf = ( k - 1 )/2
!
    freqr(:n) = coef(kmid)
!
    tmp        = (two*pi)/real( n, stnd )
    freq(:n)   = arth( zero, tmp, n )
    tmpvec(:n) = zero
!
    do i = 1, khalf
        tmpvec(:n) = tmpvec(:n) + freq(:n)
        freqr(:n)  = freqr(:n)  + two*coef(kmid+i)*cos( tmpvec(:n) )
    end do
!
!   NOW, APPLY THE FILTER USING THE FFT AND THE CONVOLUTION THEOREM.
!
!   INITIALIZE THE FFT SUBROUTINE.
!
    call init_fft( n )
!
!   TRANSFORM THE TIME SERIES.
!
    yc(:n) = cmplx( y(1:n), zero, kind=stnd )
!
    call fft( yc(:n), forward=true  )
!
!   MULTIPLY THE FOURIER TRANSFORM OF THE TIME SERIES
!   BY THE TRANSFERT FUNCTION OF THE FILTER.
!
    yc(:n) = yc(:n)*freqr(:n)
!
!   INVERT THE SEQUENCE BACK TO GET THE FILTERED TIME SERIES.
!
    call fft( yc(:n), forward=false )
!
    y3(:n) = real( yc(:n),  kind=stnd )
!
!   DEALLOCATE WORK ARRAYS.
!
    call end_fft()
!
    deallocate( coef )
!
!   TEST THE ACCURACY OF THE FILTERING OPERATION.
!
    k1    = kmid
    k2    = n - khalf
!
    err = maxval(abs(y3(k1:k2)-y2(:nfilt)))/maxval(abs(y(k1:k2)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_hp_coef
! ==========================
!
end program ex1_hp_coef

ex1_hp_coef2.F90

program ex1_hp_coef2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines/functions HP_COEF2
!   and SYMLIN_FILTER2 in module Time_Series_Procedures for high-pass filtering a time
!   series with a Lanczos filter.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, true, merror, allocate_error,   &
                         hp_coef2, symlin_filter2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES .
!
    integer(i4b), parameter :: prtunit=6, n=2001
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err
    real(stnd), dimension(n)              :: y, y2, y3
    real(stnd), dimension(:), allocatable :: coef
!
    integer(i4b) :: k, k1, k2, ph, khalf, kmid
!   
    integer :: iok
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of hp_coef2'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
    y3(:n) = y(:n)
!
!   ph IS THE MAXIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   ph IS EXPRESSED IN NUMBER OF OBSERVATIONS, I.E. ph==32(96) SELECT
!   A PERIOD OF 8 YRS FOR QUARTERLY (MONTHLY) DATA.
!
    ph  = 32
!
!   NOW SELECT THE NUMBER OF FILTER COEFFICIENTS k FOR THE HAMMING FILTER.
!
    k = ph + 1
!
!   CHECK IF k IS ODD.
!
    if ( (k/2)*2==k )  k = k + 1
!
!   ALLOCATE WORK ARRAY FOR THE FILTER COEFFICIENTS.
!
    allocate( coef(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   FUNCTION hp_coef2 COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- HIGH-PASS FILTER WITH CUTOFF PERIODS PH (EG CUTOFF FREQUENCY 1/PH).
!
    coef(:k) = hp_coef2( ph=ph, k=k )
!
!   SUBROUTINE symlin_filter2 PERFORMS A SYMMETRIC FILTERING OPERATION ON AN IMPUT
!   TIME SERIES (THE ARGUMENT VEC).
!
!   HERE symlin_filter2 FILTERS THE TIME SERIES .
!   NOTE THAT (size(COEF)-1)/2 DATA POINTS WILL BE AFFECTED BY END EFFECTS  FROM EACH END OF THE SERIES.
!
    call symlin_filter2( vec=y2(:n), coef=coef(:k) )
!
!   FILTER AGAIN THE TIME SERIES, BUT COMPUTE THE VALUES AT THE ENDS OF THE OUTPUT
!   BY ASSUMING THAT THE INPUT IS PART OF A PERIODIC SEQUENCE OF PERIOD n .
!
    call symlin_filter2( vec=y3(:n), coef=coef(:k), usefft=true )
!
!   DEALLOCATE WORK ARRAY.
!
    deallocate( coef )
!
!   TEST THE ACCURACY OF THE FILTERING OPERATION.
!
    kmid  = ( k + 1 )/2
    khalf = ( k - 1 )/2
!
    k1    = kmid
    k2    = n - khalf
!
    err = maxval(abs(y3(k1:k2)-y2(k1:k2)))/maxval(abs(y(k1:k2)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_hp_coef2
! ===========================
!
end program ex1_hp_coef2

ex1_hwfilter.F90

program ex1_hwfilter
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine HWFILTER
!   in module Time_Series_Procedures for filtering a time series in
!   a specific frequency band.
!                                                                              
!                                                                              
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, one, hwfilter
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=5000
!
    character(len=*), parameter :: name_proc='Example 1 of hwfilter'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)               :: err, win
    real(stnd), dimension(n) :: y, y2, y3
!
    integer(i4b) :: minp, maxp
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of hwfilter'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
    y3(:n) = y(:n)
!
    minp  = 18_i4b
    maxp  = 96_i4b
!
!   BY DEFAULT, HAMMMING WINDOW FILTERING IS USED (i.e. win=0.54).
!   SET win=0.5 for HANNING WINDOW OR win=1 FOR RECTANGULAR WINDOW.
!   IN ANY CASE, win MUST BE GREATER OR EQUAL TO 0.5 AND LESS OR EQUAL TO 1.
!
    win = one
!
!   hwfilter FILTERS A TIME SERIES (THE ARGUMENT VEC) IN THE FREQUENCY BAND
!   LIMITED BY PERIODS PL AND PH BY WINDOWED FILTERING (PL AND PH ARE EXPRESSED IN NUMBER OF
!   POINTS, i.e. PL=18 AND PH=96 SELECTS PERIODS BETWEEN 1.5 YRS AND 8 YRS FOR MONTHLY DATA).
!
!   FILTER THE TIME SERIES, KEEP THE PERIODS BETWEEN minp AND maxp .
!
    call hwfilter( vec=y2(:n), pl=minp, ph=maxp, win=win )
!
!   SETTING PH<PL IS ALLOWED AND PERFORMS BAND REJECTION OF PERIODS BETWEEN PH AND PL.
!
!   FILTER THE TIME SERIES, KEEP THE PERIODS OUTSIDE THE BAND BETWEEN minp AND maxp .
!
    call hwfilter( vec=y3(:n), pl=maxp, ph=minp, win=win )
!
!   NOW RECONSTRUCT THE ORIGINAL TIME SERIES FROM THE TWO FILTERED TIME SERIES.
!
    y2(:n) = y2(:n) + y3(:n)
!
!   TEST THE ACCURACY OF THE RECONSTRUCTION.
!
    err = maxval(abs(y(:n)-y2(:n)))/maxval(abs(y(:n)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_hwfilter
! ===========================
!
end program ex1_hwfilter

ex1_hwfilter2.F90

program ex1_hwfilter2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine HWFILTER2
!   in module Time_Series_Procedures for filtering a time series in
!   a specific frequency band.
!                                                                              
!                                                                              
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, one, hwfilter2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=20000
!
    character(len=*), parameter :: name_proc='Example 1 of hwfilter2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)               :: err, win
    real(stnd), dimension(n) :: y, y2, y3
!
    integer(i4b) :: minp, maxp
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
    y3(:n) = y(:n)
!
    minp  = 18_i4b
    maxp  = 96_i4b
!
!   BY DEFAULT, HAMMMING WINDOW FILTERING IS USED (i.e. win=0.54).
!   SET win=0.5 for HANNING WINDOW OR win=1 FOR RECTANGULAR WINDOW.
!   IN ANY CASE, win MUST BE GREATER OR EQUAL TO 0.5 AND LESS OR EQUAL TO 1.
!
    win = one
!
!   hwfilter FILTERS A TIME SERIES (THE ARGUMENT VEC) IN THE FREQUENCY BAND
!   LIMITED BY PERIODS PL AND PH BY WINDOWED FILTERING (PL AND PH ARE EXPRESSED IN NUMBER OF
!   POINTS, i.e. PL=18 AND PH=96 SELECTS PERIODS BETWEEN 1.5 YRS AND 8 YRS FOR MONTHLY DATA).
!
!   FILTER THE TIME SERIES, KEEP THE PERIODS BETWEEN minp AND maxp .
!
    call hwfilter2( vec=y2(:n), pl=minp, ph=maxp, win=win )
!
!   SETTING PH<PL IS ALLOWED AND PERFORMS BAND REJECTION OF PERIODS BETWEEN PH AND PL.
!
!   FILTER THE TIME SERIES, KEEP THE PERIODS OUTSIDE THE BAND BETWEEN minp AND maxp .
!
    call hwfilter2( vec=y3(:n), pl=maxp, ph=minp, win=win )
!
!   NOW RECONSTRUCT THE ORIGINAL TIME SERIES FROM THE TWO FILTERED TIME SERIES.
!
    y2(:n) = y2(:n) + y3(:n)
!
!   TEST THE ACCURACY OF THE RECONSTRUCTION.
!
    err = maxval(abs(y(:n)-y2(:n)))/maxval(abs(y(:n)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_hwfilter2
! ============================
!
end program ex1_hwfilter2

ex1_id_cmp.F90

program ex1_id_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines ID_CMP in 
!   module Random and ORTHO_GEN_QR in module QR_Procedures for computing a
!   (deterministic or randomized) column interpolative decomposition.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, id_cmp,  &
                         ortho_gen_qr, norm, merror, allocate_error, gen_random_mat,       &
                         random_seed_
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX;
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX;
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX;
! nid IS THE TARGET RANK OF THE COLUMN INTERPOLATIVE DECOMPOSITION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, nsvd0=2000, nid=100
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of id_cmp'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, err4, tmp, norma, normr, &
                                               eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, singval0
    real(stnd), allocatable, dimension(:,:) :: a, t, c, v, resid
!
    integer(i4b)                            :: i, nt, blk_size, nover, mat_type
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, random_qr
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE A (RANDOMIZED OR DETERMINISTIC) COLUMN INTERPOLATIVE
!               DECOMPOSITION (ID) OF A DATA MATRIX.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 3_i4b
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE RESULTS OF THE SUBROUTINE.
!
    do_test = true
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
    err = zero
!
!   DETERMINE PARAMETERS OF THE COLUMN INTERPOLATIVE DECOMPOSITION (ID) ALGORITHM.
!
!   SET TOLERANCE FOR CHECKING THE RANK OF THE PARTIAL ID APPROXIMATION IN THE SUBROUTINE.
!
    tol = eps
!
!   SPECIFY IF A RANDOMIZED OR DETERMINISTIC COLUMN ID ALGORITHM IS USED.
!
    random_qr = true
!
!   DETERMINE THE BLOCKSIZE PARAMETER IN THE RANDOMIZED COLUMN ID ALGORITHM.
!
    blk_size = 20_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE PARAMETER IN THE RANDOMIZED COLUMN ID ALGORITHM.
!
    nover = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    nt = n - nid
!
    if ( do_test ) then
!
        i = max( m, n )
!
        allocate( a(m,i), diagr(nid), beta(nid), ip(n), singval0(nsvd0),  &
                  t(nid,nt), c(m,nid), v(nid,n), resid(m,i), stat=iok )
!
    else
!
        allocate( a(m,n), diagr(nid), beta(nid), ip(n), singval0(nsvd0),  &
                  t(nid,nt), c(m,nid), v(nid,n), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   SAVE THE INPUT MATRIX FOR LATER USE IF REQUIRED.
!
    if ( do_test ) then
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A (RANDOMIZED OR DETERMINISTIC) COLUMN ID DECOMPOSITION OF A DATA MATRIX a
!   WITH SUBROUTINE id_cmp. THE RANK OF THE COLUMN ID DECOMPOSITION IS
!   DETERMINED BY THE NUMBER OF ROWS OF THE ARRAY ARGUMENT t, nid = size(t,1) .
!
    call id_cmp( a(:m,:n), ip(:n), t(:nid,:nt), c=c(:m,:nid), v=v(:nid,:n), &
                 diagr=diagr(:nid), beta=beta(:nid), rnorm=normr, tol=tol,  &
                 random_qr=random_qr, blk_size=blk_size, nover=nover )
!
!   THE ROUTINE COMPUTES A (RANDOMIZED OR DETERMINISTIC) COLUMN ID DECOMPOSITION OF a AS:
!
!                     a ≈ c * v = c * [ I  t ] * P'
!
!   WHERE c IS A m-BY-nid MATRIX, WHICH CONSISTS OF A SUBSET OF nid COLUMNS OF a,
!   v IS A nid-BY-n MATRIX, I IS THE IDENTITY MATRIX OF ORDER nid, t IS A nid-BY-(n-nid)
!   MATRIX AND P IS A n-BY-n PERMUTATION MATRIX. THE c AND v MATRICES ARE ESTIMATED
!   TO MINIMIZE THE ERROR OF THE COLUMN ID DECOMPOSITION.
!
!   SUCH COLUMN ID DECOMPOSITION CAN BE COMPUTED EFFICIENTLY WITH THE HELP OF A (RANDOMIZED
!   OR DETERMINISTIC) PARTIAL QR DECOMPOSITION WITH COLUMN PIVOTING OF a, WHICH IS DEFINED AS:
!
!                     a * P ≈ Q * R = Q * [ R11  R12 ]
!
!   WHERE Q IS A m-BY-nid MATRIX WITH ORTHOGONAL COLUMNS, R IS A nid-BY-n UPPER OR TRAPEZOIDAL MATRIX
!   AND R11 IS A nid-BY-nid UPPER TRIANGULAR MATRIX.
!
!   THE MATRIX c AND THE SUBMATRIX t IN THE COLUMN ID DECOMPOSITION OF a CAN BE COMPUTED AS:
!
!                     c = Q * R11      AND      t = inv(R11) * R12
!
!   AND THE FROBENIUS NORM OF THE ERROR OF THIS COLUMN ID DECOMPOSITION OF a IS THE SAME
!   AS THAT OF THIS PARTIAL QR DECOMPOSITION OF a.
!
!   ON EXIT OF id_cmp, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS PARTIAL QR FACTORIZATION:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:nid) AND THE ARRAY
!     beta(:nid) STORES Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr. 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX p IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. THEN, tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R11, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE PARTIAL QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE NUMERICAL RANK OF R11 AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ THE CALCULATIONS TO DETERMINE THE
!       CONDITION NUMBER OF R11 ARE NOT PERFORMED AND THE RANK OF R11 IS ASSUMED TO
!       BE EQUAL TO nid.
!
!   THE SUBROUTINE WILL EXIT WITH AN ERROR MESSAGE IF THE RANK OF R11 IS LESS THAN nid.
!
!   IT IS POSSIBLE THAT a MAY NOT BE OF FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS) AND THAT R11 IS SINGULAR, THEN THE LINEARLY
!   DEPENDENT COLUMNS CAN USUALLY BE EXCLUDED FROM THE QR (AND ID) APPROXIMATION AND THE
!   RANK OF R11 CAN BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE RELATIVE ERROR OF THE COLUMN ID APPROXIMATION.
!
    err1 = normr/norma
!                
    if ( do_test ) then
!
!       CHECK ACCURACY OF THE FROBENIUS NORM OF THE RESIDUAL MATRIX.
!
        resid(:m,:n) = resid(:m,:n) - matmul( c(:m,:nid), v(:nid,:n) )
!
        if ( normr<=one ) then
!
            err2 = abs( norm( resid(:m,:n) ) - normr )
!
        else
!
            err2 = abs( norm( resid(:m,:n) )/normr - one )
!
        end if
!
!       GENERATE ORTHOGONAL MATRIX Q OF PARTIAL QR DECOMPOSITION OF DATA MATRIX a .
!
        call ortho_gen_qr( a(:m,:m), beta(:nid) )
!
!       HERE ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS THE PRODUCT OF nid ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(nid)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp, partial_rqr_cmp2
!       partial_rtqr_cmp, id_cmp AND ts_id_cmp SUBROUTINES.
!
!       THE SIZE OF beta DETERMINES THE NUMBER OF ELEMENTARY REFLECTORS WHOSE PRODUCT
!       DEFINES THE MATRIX Q.
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION c(:m,:nid) - Q(:m,:nid)*(Q(:m,:nid)'*c(:m,:nid)).
!
        t(:nid,:nid) = matmul( transpose(a(:m,:nid)), c(:m,:nid) )
!
        resid(:m,:nid) = abs( c(:m,:nid) - matmul( a(:m,:nid), t(:nid,:nid) ) )
!
        err3 = maxval( resid(:m,:nid) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q**(t)*Q.
!
!        call unit_matrix( resid(:m,:m) )
!
!        resid(:m,:m) = abs( resid(:m,:m) - matmul( transpose(a(:m,:m)), a(:m,:m) ) )
!        err3 = maxval( resid(:m,:m) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF c(:m,:nid) AND ITS ORTHOGONAL COMPLEMENT Q(:m,nid+1:m).
!
        if ( m>nid ) then
!
            resid(:nid,nid+1_i4b:m) = matmul( transpose(c(:m,:nid)), a(:m,nid+1_i4b:m) )
!
            err4 = maxval( abs( resid(:nid,nid+1_i4b:m) ) )/real(m,stnd)
!
        else
!
            err4 = zero
!
        end if
!
        err = max( err2, err3, err4 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip, singval0, c, v, t )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the column ID approximation      &
                       &                                        = ', nid
!        
    write (prtunit,*) 'Relative error of the column ID decomposition  &
                       &||A - C*V||_F/||A||_F             = ', err1
!
    if ( do_test ) then
!        
        write (prtunit,*) 'Accuracy of the range of the column ID &
                          &approximation                             = ', err3
!
        if ( m>nid ) then
            write (prtunit,*) 'Orthogonality of the range of the ID approximation&
                               & and its orthogonal complement = ', err4
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing a (randomized) column ID decomposition of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_id_cmp
! =========================
!
end program ex1_id_cmp

ex1_inv.F90

program ex1_inv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of fonction INV
!   in module Lin_Procedures for computing the inverse of a real matrix.
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, is_nan, zero, one, inv, &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT AND n IS THE DIMENSION OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=4000
!
    character(len=*), parameter :: name_proc='Example 1 of inv'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, ainv, res
!
    integer(i4b) :: j
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, failure
!
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE INVERSE OF A REAL n-BY-n MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), ainv(n,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX a .
!
    call random_number( a )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE MATRIX INVERSE WITH FUNCTION inv.
!
    ainv = inv( a ) 
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE MATRIX IS SINGULAR.
!
    failure = is_nan( ainv )
!
    if ( failure ) then
!
!       ANORMAL EXIT FROM inv FUNCTION, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of INV function, input matrix is singular'
        write (prtunit,*)
!
    else if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE a TIMES ITS INVERSE - IDENTITY.
!
        res = matmul( a, ainv )
!
        do j = 1_i4b, n
            res(j,j) = res(j,j) - one
        end do
!
        err = norm( res(:n,:n) ) /( real(n,stnd)*norm(a(:n,:n)) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( ainv, a )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the inverse of a real matrix of size ', &
       n, ' by ', n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_inv
! ======================
!
end program ex1_inv

ex1_leapyr.F90

program ex1_leapyr
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function LEAPYR
!   in module Time_Procedures for determining if a given year is a leap year or not.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, leapyr
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!
    character(len=*), parameter :: name_proc='Example 1 of leapyr'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer(i4b) :: iyr
!   
    logical(lgl) :: is_leapyr
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A YEAR.
!
    iyr  = 1902
!
!   CHECK IF iyr IS A LEAP YEAR. 
!
    is_leapyr = leapyr( iyr )
!
!   FUNCTION leapyr IS RETURNED AS "true"
!   IF iyr IS A LEAP YEAR, AND "false" OTHERWISE.
!
!   LEAP YEARS ARE YEARS THAT ARE EVENLY DIVISIBLE BY 4, EXCEPT YEARS
!   THAT ARE EVENLY DIVISIBLE BY 100 MUST BE DIVISIBLE BY 400.
!   GREGORIAN CALENDAR ADOPTED OCT. 15, 1582.
!
!   PRINT THE RESULT.
!
    if ( is_leapyr ) then
        write (prtunit,*) 'The year ', iyr,' is a leap year'
    else
        write (prtunit,*) 'The year ', iyr,' is not a leap year'
    end if
!
!
! END OF PROGRAM ex1_leapyr
! =========================
!
end program ex1_leapyr

ex1_lin_lu_solve.F90

program ex1_lin_lu_solve
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LIN_LU_SOLVE
!   in module Lin_Procedures for solving a real linear system by a LU decomposition
!   with partial pivoting and implicit row scaling.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, safmin, zero, half, lin_lu_solve, &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE SIZE OF THE LINEAR SYSTEM TO BE SOLVED.
!
    integer(i4b), parameter :: prtunit=6, n=5000
!
    character(len=*), parameter :: name_proc='Example 1 of lin_lu_solve'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, tmp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a
    real(stnd), dimension(:), allocatable   :: b, x, res
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, do_test
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n MATRIX
!               AND ONE RIGHT HAND-SIDE WITH THE LU DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n), x(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    a = a - half
!
    call random_number( tmp )
!
    if ( tmp>safmin ) then
        a = a/tmp
    end if
!
!   GENERATE A n-ELEMENT RANDOM SOLUTION VECTOR x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-VECTOR PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION VECTOR FOR LINEAR SYSTEM
!
!            a*x = b .
!
!   BY COMPUTING THE LU DECOMPOSITION WITH PARTIAL PIVOTING AND
!   IMPLICIT ROW SCALING OF MATRIX a. IF ON OUTPUT OF lin_lu_solve
!   failure IS SET TO FALSE THEN THE LINEAR SYSTEM IS NOT SINGULAR
!   AND THE SOLUTION VECTOR HAS BEEN COMPUTED.
!
    call lin_lu_solve( a, b, failure )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( failure ) then
!
!       ANORMAL EXIT FROM lin_lu_solve SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in the call to LIN_LU_SOLVE subroutine, failure=', failure
!
    else if ( do_test ) then
!
!       ALLOCATE WORK ARRAY .
!
        allocate( res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n) = b(:n) - x(:n)
        err     = norm(res)/norm(x)
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
!
        write (prtunit,*) name_proc//' is correct'
!
        if ( do_test ) then
!
            write (prtunit,*) 
!
!           PRINT RELATIVE ERROR OF COMPUTED SOLUTION.
!
            write (prtunit,*) 'Relative error of the computed solution = ', err
!
        end if
!
    else
!
        write (prtunit,*) name_proc//' is incorrect'
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the solution of a linear real system of size ', &
       n, ' by ', n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_lin_lu_solve
! ===============================
!
end program ex1_lin_lu_solve

ex1_llsq_qr_solve.F90

program ex1_llsq_qr_solve
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LLSQ_QR_SOLVE2
!   in module LLSQ_Procedures for solving linear least squares problems with a
!   QR decomposition (with column pivoting) or a complete orthogonal factorization
!   of the coefficient matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c50, allocate_error, &
                         merror, llsq_qr_solve
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=8000, n=4000
!
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of llsq_qr_solve'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: x, resid, b
    real(stnd), allocatable, dimension(:,:) :: a
!
    integer(i4b) :: krank
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, min_norm
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A QR DECOMPOSITION WITH COLUMN PIVOTING OR A COMPLETE ORTHOGONAL
!               DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n) ≈ b(:m) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED.
!
    krank = 0
!
!   DECIDE IF THE MINIMUN 2-NORM SOLUTION(S) MUST BE COMPUTED.
!
    min_norm = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), resid(m), x(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   SOLVE THE LINEAR LEAST SQUARES PROBLEM BY A QR DECOMPOSITION WITH COLUMN PIVOTING
!   USING SUBROUTINE llsq_qr_solve.
!
    call llsq_qr_solve( a(:m,:n), b(:m), x(:n), resid=resid(:m),  &
                        krank=krank, tol=tol, min_norm=min_norm   )
!
!   llsq_qr_solve COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OF a. a IS A m-ELEMENTS
!   VECTOR OR AN m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. IF a IS A MATRIX, m>=n OR
!   n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES.
!
!   a AND b ARE NOT OVERWRITTEN BY llsq_qr_solve. THE OPTIONAL ARGUMENTS krank,
!   tol AND min_norm MUST NOT BE PRESENT IF a IS A m-ELEMENTS VECTOR.
!
!   ON INPUT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED AND IF krank=k,
!   THIS IMPLIES THAT THE FIRST k COLUMNS OF a ARE TO BE FORCED INTO THE BASIS.
!   PIVOTING IS PERFORMED ON THE LAST n-k COLUMNS OF a.
!   
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a.
!   
!   BY DEFAULT, PIVOTING IS DONE ON ALL THE COLUMNS OF a.
!   
!   ON EXIT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED,
!   krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, SPECIFIC CALCULATIONS TO DETERMINE THE EFFECTIVE RANK a
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS THEN DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IN THE 1-NORM IS LESS THAN 1/tol.
!       IF tol=0 IS SPECIFIED THE NUMERICAL RANK OF a IS DETERMINED.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE EFFECTIVE RANK OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING krank=0  AND tol=RELATIVE PRECISION OF THE ELEMENTS
!   IN a. IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD
!   BE USED. ALSO, IT MAY BE HELPFUL TO SCALE THE COLUMNS OF a SO THAT ALL ELEMENTS 
!   ARE ABOUT THE SAME ORDER OF MAGNITUDE.
!   
!   ON EXIT, IF THE OPTIONAL PARAMETER resid IS PRESENT,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND ON EXIT
!
!                               resid = b - a*x .
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF llsq_qr_solve .
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL LOGICAL PARAMETER min_norm IS
!   PRESENT AND IS SET TO true IN THE CALL TO llsq_qr_solve. OTHERWISE, SOLUTION(S) ARE COMPUTED
!   SUCH THAT IF THE jTH COLUMN OF a IS OMITTED FROM THE BASIS, x[j] OR x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
        err = maxval( abs( matmul( resid, a ) ) )/ sum( abs(a) )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, resid, x )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_llsq_qr_solve
! ================================
!
end program ex1_llsq_qr_solve

ex1_llsq_qr_solve2.F90

program ex1_llsq_qr_solve2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LLSQ_QR_SOLVE2
!   in module LLSQ_Procedures for solving linear least squares problems with a
!   QR decomposition (with column pivoting) or a complete orthogonal factorization
!   of the coefficient matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c50, allocate_error, &
                         merror, llsq_qr_solve2
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=8000, n=4000, mn=min( m, n )
!
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of llsq_qr_solve2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: x, b
    real(stnd), allocatable, dimension(:,:) :: a, a2
!
    integer(i4b)                            :: krank, j, l, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, min_norm, do_test, test_lin
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A QR DECOMPOSITION WITH COLUMN PIVOTING OR A COMPLETE ORTHOGONAL
!               DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n) ≈ b(:m) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED.
!
    krank = 0
!
!   DECIDE IF THE RESIUDALS MUST BE COMPUTED.
!
    comp_resid = true
!
!   DECIDE IF THE MINIMUN 2-NORM SOLUTION(S) MUST BE COMPUTED.
!
    min_norm = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(min(m,n)-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(m,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   SOLVE THE LINEAR LEAST SQUARES PROBLEM BY A QR DECOMPOSITION WITH COLUMN PIVOTING
!   USING SUBROUTINE llsq_qr_solve2.
!
    call llsq_qr_solve2( a(:m,:n), b(:m), x(:n), comp_resid=comp_resid,     &
                         krank=krank, tol=tol, min_norm=min_norm, ip=ip(:n) )
!
!   llsq_qr_solve2 COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OF a. a IS A m-ELEMENTS
!   VECTOR OR AN m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. IF a IS A MATRIX, m>=n OR
!   n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES.
!
!   a AND b ARE OVERWRITTEN BY llsq_qr_solve2. THE OPTIONAL ARGUMENTS krank,
!   tol AND min_norm MUST NOT BE PRESENT IF a IS A m-ELEMENTS VECTOR.
!
!   ON INPUT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED AND IF krank=k,
!   THIS IMPLIES THAT THE FIRST k COLUMNS OF a ARE TO BE FORCED INTO THE BASIS.
!   PIVOTING IS PERFORMED ON THE LAST n-k COLUMNS OF a.
!   
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a.
!   
!   BY DEFAULT, PIVOTING IS DONE ON ALL THE COLUMNS OF a.
!   
!   ON EXIT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED,
!   krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, SPECIFIC CALCULATIONS TO DETERMINE THE EFFECTIVE RANK a
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS THEN DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IN THE 1-NORM IS LESS THAN 1/tol.
!       IF tol=0 IS SPECIFIED THE NUMERICAL RANK OF a IS DETERMINED.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE EFFECTIVE RANK OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING krank=0  AND tol=RELATIVE PRECISION OF THE ELEMENTS
!   IN a. IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD
!   BE USED. ALSO, IT MAY BE HELPFUL TO SCALE THE COLUMNS OF a SO THAT ALL ELEMENTS 
!   ARE ABOUT THE SAME ORDER OF MAGNITUDE.
!   
!   ON EXIT, IF THE OPTIONAL INTEGER ARRAY ip IS PRESENT, ip STORES THE PERMUTATION MATRIX
!   P IN THE QR OR COMPLETE DECOMPOSITION OF a.
!   IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!   THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!   IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND IS OUTPUT IN b .
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF llsq_qr_solve2 .
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL LOGICAL PARAMETER min_norm IS
!   PRESENT AND IS SET TO true. OTHERWISE, SOLUTION(S) ARE COMPUTED SUCH THAT IF THE jTH COLUMN
!   OF a IS OMITTED FROM THE BASIS, x[j] OR x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED CORRECTLY.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    else
!
        test_lin = true
!
    end if
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
        err = maxval( abs( matmul( b, a2 ) ) )/ sum( abs(a2) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( a2 )
!
    end if
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    if ( krank/=mn ) then
        write (prtunit,*) 'Indices of linearly dependent columns    = ', ip(krank+1:n)
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, ip )
!
!
! END OF PROGRAM ex1_llsq_qr_solve2
! =================================
!
end program ex1_llsq_qr_solve2

ex1_llsq_svd_solve.F90

program ex1_llsq_svd_solve
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LLSQ_SVD_SOLVE
!   in module LLSQ_Procedures for solving linear least squares problems using an
!   SVD decomposition of the coefficient matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c100, lamch, norm,        &
                         print_array, llsq_svd_solve, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=8000, n=6000, mn=min(m,n)
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c100
!
    character(len=*), parameter :: name_proc='Example 1 of llsq_svd_solve'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, ulp, eps, tol, anorm, rnorm, bnorm, cond, sfmin, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, b2, res, res2, x, sing_values
    real(stnd), allocatable, dimension(:,:) :: a, a2
!
    integer(i4b) :: krank, j
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, do_test, do_print
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVE A LINEAR LEAST SQUARES SYSTEM WITH ONE RIGHT HAND-SIDE
!               BY THE SINGULAR VALUE DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE
!   AND IF DETAILED RESULTS MUST BE PRINTED.
!
    do_test  = true
    do_print = false
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), sing_values(mn), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-by-n REAL RANDOM COEFFICIENT MATRIX a .
!
    call random_number( a )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    j = min( n, 5_i4b ) 
    a(:m,j) = a(:m,1_i4b) + a(:m,2_i4b)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b .
!
    call random_number( b )
!
!   COMPUTE THE NORM OF DEPENDENT VARIABLE b .
!
    bnorm = norm( b(:m) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), b2(m), res(m), res2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       SAVE DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
!       SAVE RIGHT HAND SIDE VECTOR.
!
        b2(:m) = b(:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   llsq_svd_solve COMPUTES THE MINIMUM NORM SOLUTION TO A REAL LINEAR LEAST
!   SQUARES PROBLEM :
!
!                       Minimize || b - a*x ||_2
!
!   USING THE SINGULAR VALUE DECOMPOSITION (SVD) OF a. A IS AN m-BY-n MATRIX
!   WHICH MAY BE RANK-DEFICIENT. b AND x CAN BE VECTORS OF MATRICES, BUT THEIR
!   SHAPES MUST BE CONFORMABLE WITH THE SHAPE OF a.
!
!   IN OTHER WORDS, IF b AND x ARE MATRICES, SEVERAL RIGHT HAND SIDE VECTORS b
!   AND SOLUTION VECTORS x CAN BE HANDLED IN A SINGLE CALL; THEY ARE STORED AS
!   THE COLUMNS OF THE m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX b, RESPECTIVELY.
!
!   THE EFFECTIVE RANK OF a, krank,IS DETERMINED BY TREATING AS ZERO THOSE
!   SINGULAR VALUES WHICH ARE LESS THAN tol TIMES THE LARGEST SINGULAR VALUE.
!
    call llsq_svd_solve( a, b, failure, x,                                            &
                         singvalues=sing_values, krank=krank, rnorm=rnorm, tol=tol )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUAL VECTOR IS ORTHOGONAL TO THE RANGE OF a .
!
        res(:m)  = b2(:m) - matmul( a2(:m,:n), x(:n) )
        res2(:n) = matmul( res(:m), a2(:m,:n) )
!
        err1 = maxval( abs(res2(:n)) )/anorm
!
!       CHECK THE NORM OF THE RESIDUAL VECTOR.
!
        err2 = abs( norm( res(:m) ) - rnorm )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, b2, res, res2 )
!
    end if
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!   GET MACHINE CONSTANT sfmin, SUCH THAT 1/sfmin DOES NOT OVERFLOW.
!
    sfmin = lamch( 's' )
!
!   COMPUTE THE CONDITION NUMBER OF a(:m,:n) IN THE 2-NORM
!
!            singvalues(1)/singvalues(min(m,n)) .
!
    if ( sing_values(mn)/sing_values(1_i4b)<=sfmin ) then
        cond = huge( cond )
    else
        cond = sing_values(1_i4b)/sing_values(mn)
    end if
!
!   PRINT RESULTS .
!
    write (prtunit,*)
    write (prtunit,*)
    write (prtunit,*) 'Least squares solution via Singular Value Decomposition'
    write (prtunit,*)
    write (prtunit,*) '    min of ||a(:,:)*x(:)-b(:)||**2 for vector x(:)     '
    write (prtunit,*)
    write (prtunit,*) 'Tolerance for zero singular values (tol*sing_values(1)):',tol*sing_values(1)
    write (prtunit,*)
    write (prtunit,*) 'Condition number (in the 2-norm) of a :',cond
    write (prtunit,*) 'Rank of a                             :',krank
    write (prtunit,*)
    write (prtunit,*) 'Residual sum of squares     ||a*x-b||**2          :',rnorm**2
    write (prtunit,*) 'Residual sum of squares (%) ||a*x-b||**2/||b||**2 :',(rnorm/bnorm)**2
    write (prtunit,*)
!
    if ( do_print ) then
!
!       PRINT DETAILED RESULTS.
!
        call print_array( sing_values, title=' Singular values of a ' )
!
        write (prtunit,*)
!
        call print_array( x, title=' Least squares solution vector x ' )
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, sing_values )
!
!
! END OF PROGRAM ex1_llsq_svd_solve
! =================================
!
end program ex1_llsq_svd_solve

ex1_llsq_svd_solve2.F90

program ex1_llsq_svd_solve2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LLSQ_SVD_SOLVE2
!   in module LLSQ_Procedures for solving linear least squares problems using the
!   Rhala-Barlow one-sided bidiagonal reduction algorithm and an SVD decomposition
!   of the coefficient matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c200, lamch, norm, &
                         print_array, llsq_svd_solve2, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! m MUST BE GREATER THAN n FOR USING llsq_svd_solve2 SUBROUTINE.
!
    integer(i4b), parameter :: prtunit=6, m=8000, n=6000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c200
!
    character(len=*), parameter :: name_proc='Example 1 of llsq_svd_solve2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, ulp, eps, tol, anorm, rnorm, bnorm, cond, sfmin, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, b2, res, res2, x, sing_values
    real(stnd), allocatable, dimension(:,:) :: a, a2
!
    integer(i4b) :: krank, j
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, do_test, do_print
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVE A LINEAR LEAST SQUARES SYSTEM WITH ONE RIGHT HAND-SIDE
!               BY USING THE ONE_SIDED RALHA-BARLOW BIDIAGONAL REDUCTION ALGORITHM AND
!               THE SINGULAR VALUE DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE
!   AND IF DETAILED RESULTS MUST BE PRINTED.
!
    do_test  = true
    do_print = false
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), sing_values(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-by-n REAL RANDOM COEFFICIENT MATRIX a .
!
    call random_number( a )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK n-1 .
!
    j = min( n, 5_i4b ) 
    a(:m,j) = a(:m,1_i4b) + a(:m,2_i4b)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b .
!
    call random_number( b )
!
!   COMPUTE THE NORM OF DEPENDENT VARIABLE b .
!
    bnorm = norm( b(:m) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), b2(m), res(m), res2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       SAVE DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
!       SAVE RIGHT HAND SIDE VECTOR.
!
        b2(:m) = b(:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   llsq_svd_solve2 COMPUTES THE MINIMUM NORM SOLUTION TO A REAL LINEAR LEAST
!   SQUARES PROBLEM :
!
!                       Minimize || b - a*x ||_2
!
!   USING THE SINGULAR VALUE DECOMPOSITION (SVD) OF a. A IS AN m-BY-n MATRIX
!   WITH m>=n, WHICH MAY BE RANK-DEFICIENT. b AND x CAN BE VECTORS OF MATRICES,
!   BUT THEIR SHAPES MUST BE CONFORMABLE WITH THE SHAPE OF a.
!
!   IN OTHER WORDS, IF b AND x ARE MATRICES, SEVERAL RIGHT HAND SIDE VECTORS b
!   AND SOLUTION VECTORS x CAN BE HANDLED IN A SINGLE CALL; THEY ARE STORED AS
!   THE COLUMNS OF THE m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX b, RESPECTIVELY.
!
!   THE EFFECTIVE RANK OF a, krank,IS DETERMINED BY TREATING AS ZERO THOSE
!   SINGULAR VALUES WHICH ARE LESS THAN tol TIMES THE LARGEST SINGULAR VALUE.
!
    call llsq_svd_solve2( a, b, failure, x,                                         &
                          singvalues=sing_values, krank=krank, rnorm=rnorm, tol=tol )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUAL VECTOR IS ORTHOGONAL TO THE RANGE OF a .
!
        res(:m)  = b2(:m) - matmul( a2(:m,:n), x(:n) )
        res2(:n) = matmul( res(:m), a2(:m,:n) )
!
        err1 = maxval( abs(res2(:n)) )/anorm
!
!       CHECK THE NORM OF THE RESIDUAL VECTOR.
!
        err2 = abs( norm( res(:m) ) - rnorm )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, b2, res, res2 )
!
    end if
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!   GET MACHINE CONSTANT sfmin, SUCH THAT 1/sfmin DOES NOT OVERFLOW.
!
    sfmin = lamch( 's' )
!
!   COMPUTE THE CONDITION NUMBER OF a(:m,:n) IN THE 2-NORM
!
!            singvalues(1)/singvalues(n) .
!
    if ( sing_values(n)/sing_values(1_i4b)<=sfmin ) then
        cond = huge( cond )
    else
        cond = sing_values(1_i4b)/sing_values(n)
    end if
!
!   PRINT RESULTS .
!
    write (prtunit,*)
    write (prtunit,*)
    write (prtunit,*) 'Least squares solution via Singular Value Decomposition'
    write (prtunit,*)
    write (prtunit,*) '    min of ||a(:,:)*x(:)-b(:)||**2 for vector x(:)     '
    write (prtunit,*)
    write (prtunit,*) 'Tolerance for zero singular values (tol*sing_values(1)):',tol*sing_values(1)
    write (prtunit,*)
    write (prtunit,*) 'Condition number (in the 2-norm) of a :',cond
    write (prtunit,*) 'Rank of a                             :',krank
    write (prtunit,*)
    write (prtunit,*) 'Residual sum of squares     ||a*x-b||**2          :',rnorm**2
    write (prtunit,*) 'Residual sum of squares (%) ||a*x-b||**2/||b||**2 :',(rnorm/bnorm)**2
    write (prtunit,*)
!
    if ( do_print ) then
!
!       PRINT DETAILED RESULTS.
!
        call print_array( sing_values, title=' Singular values of a ' )
!
        write (prtunit,*)
!
        call print_array( x, title=' Least squares solution vector x ' )
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, sing_values )
!
!
! END OF PROGRAM ex1_llsq_svd_solve2
! ==================================
!
end program ex1_llsq_svd_solve2

ex1_lp_coef.F90

program ex1_lp_coef
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines/functions LP_COEF
!   and SYMLIN_FILTER in module Time_Series_Procedures for low-pass filtering a time
!   series with a Lanczos filter.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, zero, half, one, two, pi, true, false, merror, allocate_error,   &
                         arth, lp_coef, symlin_filter, init_fft, fft, end_fft
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=2000
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err, tmp, fcl
    real(stnd), dimension(n)              :: y, y2, y3, freq, freqr, tmpvec
    real(stnd), dimension(:), allocatable :: coef
!
    complex(stnd), dimension(n)           :: yc
!
    integer(i4b) :: i, k, k1, k2, pl, nfilt, khalf, kmid
!   
    integer :: iok
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of lp_coef'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
!
!   pl IS THE MINIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   pl IS EXPRESSED IN NUMBER OF OBSERVATIONS, I.E. pl==32(96) SELECT
!   A PERIOD OF 8 YRS FOR QUARTERLY (MONTHLY) DATA.
!
    pl  = 32
!
!   COMPUTE THE CORRESPONDING CUTOFF FREQUENCY fcl .
!
    fcl = one/real( pl, stnd )
!
!   NOW SELECT THE OPTIMAL NUMBER OF FILTER COEFFICIENTS k FOR THE LANCZOS FILTER.
!
    i = ceiling( one/(half-fcl) )
    k = max( i, pl+1 )
!
!   CHECK IF k IS ODD.
!
    if ( (k/2)*2==k  ) k = k + 1
!
!   ALLOCATE WORK ARRAY FOR THE FILTER COEFFICIENTS.
!
    allocate( coef(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   FUNCTION lp_coef COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- LOW-PASS FILTER WITH CUTOFF PERIODS PL (EG CUTOFF FREQUENCY 1/PL).
!
    coef(:k) = lp_coef( pl=pl, k=k )
!
!   SUBROUTINE symlin_filter PERFORMS A SYMMETRIC FILTERING OPERATION ON AN IMPUT
!   TIME SERIES (THE ARGUMENT VEC).
!
!   HERE symlin_filter FILTERS THE TIME SERIES .
!   NOTE THAT (size(COEF)-1)/2 DATA POINTS WILL BE LOST FROM EACH END OF THE SERIES SO THAT
!   NFILT (NFILT= size(VEC) - size(COEF) + 1) TIME OBSERVATIONS ARE RETURNED IN VEC(:NFILT)
!   AND THE REMAINING OBSERVATIONS ARE SET TO ZERO.
!
    call symlin_filter( vec=y2(:n), coef=coef(:k),  nfilt=nfilt )
!
!   NOW COMPUTE THE TRANSFERT FUNCTION freqr(:) OF THE LANCZOS FILTER AT THE FOURIER FREQUENCIES.
!
    kmid  = ( k + 1 )/2
    khalf = ( k - 1 )/2
!
    freqr(:n) = coef(kmid)
!
    tmp        = (two*pi)/real( n, stnd )
    freq(:n)   = arth( zero, tmp, n )
    tmpvec(:n) = zero
!
    do i = 1, khalf
        tmpvec(:n) = tmpvec(:n) + freq(:n)
        freqr(:n)  = freqr(:n)  + two*coef(kmid+i)*cos( tmpvec(:n) )
    end do
!
!   NOW, APPLY THE FILTER USING THE FFT AND THE CONVOLUTION THEOREM.
!
!   INITIALIZE THE FFT SUBROUTINE.
!
    call init_fft( n )
!
!   TRANSFORM THE TIME SERIES.
!
    yc(:n) = cmplx( y(1:n), zero, kind=stnd )
!
    call fft( yc(:n), forward=true  )
!
!   MULTIPLY THE FOURIER TRANSFORM OF THE TIME SERIES
!   BY THE TRANSFERT FUNCTION OF THE FILTER.
!
    yc(:n) = yc(:n)*freqr(:n)
!
!   INVERT THE SEQUENCE BACK TO GET THE FILTERED TIME SERIES.
!
    call fft( yc(:n), forward=false )
!
    y3(:n) = real( yc(:n),  kind=stnd )
!
!   DEALLOCATE WORK ARRAYS.
!
    call end_fft()
!
    deallocate( coef )
!
!   TEST THE ACCURACY OF THE FILTERING OPERATION.
!
    k1    = kmid
    k2    = n - khalf
!
    err = maxval(abs(y3(k1:k2)-y2(:nfilt)))/maxval(abs(y(k1:k2)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_lp_coef
! ==========================
!
end program ex1_lp_coef

ex1_lp_coef2.F90

program ex1_lp_coef2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines/functions LP_COEF2
!   and SYMLIN_FILTER2 in module Time_Series_Procedures for low-pass filtering a time
!   series with a Lanczos filter.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, true, merror, allocate_error,   &
                         lp_coef2, symlin_filter2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES .
!
    integer(i4b), parameter :: prtunit=6, n=2001
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err
    real(stnd), dimension(n)              :: y, y2, y3
    real(stnd), dimension(:), allocatable :: coef
!
    integer(i4b) :: k, k1, k2, pl, khalf, kmid
!   
    integer :: iok
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of lp_coef2'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
    y3(:n) = y(:n)
!
!   pl IS THE MINIMUM PERIOD OF OSCILLATION OF DESIRED COMPONENT.
!   pl IS EXPRESSED IN NUMBER OF OBSERVATIONS, I.E. pc==32(96) SELECT
!   A PERIOD OF 8 YRS FOR QUARTERLY (MONTHLY) DATA.
!
    pl  = 32
!
!   NOW SELECT THE NUMBER OF FILTER COEFFICIENTS k FOR THE HAMMING FILTER.
!
    k = pl + 1
    if ( (k/2)*2==k )  k = k + 1
!
!   ALLOCATE WORK ARRAY FOR THE FILTER COEFFICIENTS.
!
    allocate( coef(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   FUNCTION lp_coef2 COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- LOW-PASS FILTER WITH CUTOFF PERIODS PL (EG CUTOFF FREQUENCY 1/PL).
!
    coef(:k) = lp_coef2( pl=pl, k=k )
!
!   SUBROUTINE symlin_filter2 PERFORMS A SYMMETRIC FILTERING OPERATION ON AN IMPUT
!   TIME SERIES (THE ARGUMENT VEC).
!
!   HERE symlin_filter2 FILTERS THE TIME SERIES .
!   NOTE THAT (size(COEF)-1)/2 DATA POINTS WILL BE AFFECTED BY END EFFECTS  FROM EACH END OF THE SERIES.
!
    call symlin_filter2( vec=y2(:n), coef=coef(:k) )
!
!   FILTER AGAIN THE TIME SERIES, BUT COMPUTE THE VALUES AT THE ENDS OF THE OUTPUT
!   BY ASSUMING THAT THE INPUT IS PART OF A PERIODIC SEQUENCE OF PERIOD n .
!
    call symlin_filter2( vec=y3(:n), coef=coef(:k), usefft=true )
!
!   DEALLOCATE WORK ARRAY.
!
    deallocate( coef )
!
!   TEST THE ACCURACY OF THE FILTERING OPERATION.
!
    kmid  = ( k + 1 )/2
    khalf = ( k - 1 )/2
!
    k1    = kmid
    k2    = n - khalf
!
    err = maxval(abs(y3(k1:k2)-y2(k1:k2)))/maxval(abs(y(k1:k2)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_lp_coef2
! ===========================
!
end program ex1_lp_coef2

ex1_lq_cmp.F90

program ex1_lq_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines LQ_CMP and ORTHO_GEN_LQ
!   in module QR_Procedures for computing the LQ decomposition of a real matrix.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, one, c50, true, false, lq_cmp, ortho_gen_lq, norm, &
                         merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX,
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=4000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of lq_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, ulp, elapsed_time
    real(stnd), dimension(:),   allocatable :: diagl, tau, resid2, norma
    real(stnd), dimension(:,:), allocatable :: a, q, l, resid
!
    integer(i4b) :: k, j, p
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTING A FULL LQ DECOMPOSITION OF A MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
    k = min( m, n )
    p = max( m, n )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), diagl(k), tau(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n).
!
    call random_number( a(:m,:n) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( l(m,k), q(p,n), resid(p,n), resid2(m), norma(m), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE LQ DECOMPOSITION OF RANDOM DATA MATRIX.
!
    call lq_cmp( a(:m,:n), diagl(:k), tau(:k) )
!
!   lq_cmp COMPUTES AN ORTHOGONAL FACTORIZATION OF A REAL m-BY-n MATRIX
!   BY THE HOUSEOLDER METHOD. THE MATRIX IS ASSUMED OF FULL RANK. THE ROUTINE
!   COMPUTES A LQ FACTORIZATION OF a AS:
!
!                     a = L * Q
!
!   Q IS A n-BY-n ORTHOGONAL MATRIX AND L IS A m-BY-n LOWER TRIANGULAR OR
!   TRAPEZOIDAL MATRIX.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS ORTHOGONAL
!   FACTORIZATION. 
!
!   THE MATRIX Q IS REPRESENTED AS A PRODUCT OF ELEMENTARY REFLECTORS
!
!            q = h(k)*h(k-1)* ... *h(1), WHERE k = min( size(a,1) , size(a,2) )
!
!   EACH h(i) HAS THE FORM
!
!            h(i) = I + TAU * ( V * V' ) ,
!                      
!   WHERE TAU IS A REAL SCALAR AND V IS A REAL n-ELEMENT VECTOR WITH V(1:i-1) = 0.
!   V(i:n) IS STORED ON EXIT IN a(i,i:n) AND TAU IN tau(i).
!                      
!   THE ELEMENTS BELOW THE DIAGONAL OF THE ARRAY a CONTAIN THE
!   CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX L. THE ELEMENTS
!   OF THE DIAGONAL OF L ARE STORED IN THE ARRAY diagl ON EXIT. 
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       NOW, RESTORE TRIANGULAR FACTOR L OF LQ DECOMPOSITION OF RANDOM DATA MATRIX a
!       IN MATRIX l(:m,:k).
!
        do j = 1, k
!
            l(1:j-1,j) = zero
            l(j,j)     = diagl(j)
            l(j+1:m,j) = a(j+1:m,j)
!
        end do
!
        q(:k,:n) = a(:k,:n)
!
!       GENERATE ORTHOGONAL MATRIX Q OF LQ DECOMPOSITION OF RANDOM DATA MATRIX a
!       AND AN ORTHOGONAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a'.
!       a IS ASSUMED OF FULL RANK.
!
        call ortho_gen_lq( q(:n,:n), tau(:k) )
!
!       ortho_gen_lq GENERATES AN n-BY-n REAL MATRIX WITH ORTHONORMAL ROWS, WHICH IS
!       DEFINED AS THE PRODUCT OF k ELEMENTARY REFLECTORS OF ORDER n
!
!            q = h(k)*h(k-1)* ... *h(1)
!
!       AS RETURNED BY lq_cmp.
!
!       THE SIZE OF tau DETERMINES THE NUMBER k OF ELEMENTARY REFLECTORS
!       WHOSE PRODUCT DEFINES THE MATRIX Q.
!
!       NOW, THE ROWS OF q(:k,:n) ARE AN ORTHOGONAL BASIS FOR THE RANGE OF a'
!       AND THE ROWS OF q(k+1:n,:n) ARE AN ORTHOGONAL BASIS FOR THE ORTHOGONAL
!       COMPLEMENT TO THE RANGE OF a'.
!
!       RESTORE THE INPUT MATRIX IN a(:m,:n) .
!
        a(:m,:n) = resid(:m,:n)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a(:m,:n) - l(:m,:k)*q(:k,:n).
!
        resid(:m,:n) = a(:m,:n) - matmul( l(:m,:k), q(:k,:n) )
        resid2(:m)   = norm( resid(:m,:n), dim=1_i4b )
        norma(:m)    = norm( a(:m,:n), dim=1_i4b )
!
        err1 = maxval( resid2(:m) / norma(:m) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q*Q'.
!
        resid(:n,:n) = matmul( q(:n,:n), transpose(q(:n,:n)) )
!
        do j = 1, n
            resid(j,j) =  resid(j,j) - one
        end do
!
        err2 = maxval( abs(resid(:n,:n)) )/real(n,stnd)
!
!       CHECK ORTHOGONALITY OF a(:m,:n) AND ITS ORTHOGONAL COMPLEMENT q(m+1:n,:n).
!
        if ( m<n ) then
!
            resid(:m,m+1_i4b:n) = matmul( a(:m,:n), transpose(q(m+1_i4b:n,:n) ) )
            err3 = maxval( abs( resid(:m,m+1_i4b:n) ) )/real(n,stnd)
!
        else
!
            err3 = zero
!
        end if
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( l, q, resid, resid2, norma )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagl, tau )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the LQ decomposition     &
                          &                                      = ', err1
        write (prtunit,*) 'Orthogonality of the Q matrix        &
                          &                                      = ', err2
!
        if ( m<n ) then
            write (prtunit,*) 'Orthogonality of the row-space of the matrix&
                              & and its orthogonal complement = ', err3
        end if
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing a LQ decomposition of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_lq_cmp
! =========================
!
end program ex1_lq_cmp

ex1_lu_cmp.F90

program ex1_lu_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines LU_CMP and LU_SOLVE
!   in module Lin_Procedures for computing a LU decomposition of a real square matrix
!   and solving a linear system with such matrix as a coefficient matrix.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, half, safmin, true, false, lu_cmp, lu_solve, &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE SIZE OF THE LINEAR SYSTEM TO BE SOLVED.
!
    integer(i4b), parameter :: prtunit=6, n=1000
!
    character(len=*), parameter :: name_proc='Example 1 of lu_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, d1, tmp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a
    real(stnd), dimension(:),  allocatable  :: b, x, res
!
    integer(i4b), dimension(:), allocatable :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n MATRIX
!               AND ONE RIGHT HAND-SIDE WITH THE LU DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n), x(n), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    a = a - half
!
    call random_number( tmp )
!
    if ( tmp>safmin ) then
        a = a/tmp
    end if
!
!   GENERATE A n-ELEMENT RANDOM SOLUTION VECTOR x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-VECTOR PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION VECTOR FOR THE LINEAR SYSTEM
!
!            a*x = b .
!
!   BY COMPUTING THE LU DECOMPOSITION WITH PARTIAL PIVOTING AND
!   IMPLICIT ROW SCALING OF MATRIX a. IF ON OUTPUT OF lu_cmp
!   d1 IS DIFFERENT FROM ZERO THEN THE LINEAR SYSTEM IS NOT
!   SINGULAR AND CAN BE SOLVED BY SUBROUTINE lu_solve.
!
    call lu_cmp( a, ip, d1 )
!
    if ( d1==zero ) then
!
!       ANORMAL EXIT FROM lu_cmp SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of LU_CMP subroutine, d1=', d1
        write (prtunit,*)
!
    else
!
!       SOLVE THE LINEAR SYSTEM.
!
        call lu_solve( a, ip, b )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1/=zero .and. do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n) = b(:n) - x(:n)
        err     = norm(res)/norm(x)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x, ip, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x, ip )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1/=zero ) then
!
        write (prtunit,*) name_proc//' is correct'
!
        if ( do_test ) then
!
            write (prtunit,*) 
!
!           PRINT RELATIVE ERROR OF COMPUTED SOLUTION.
!
            write (prtunit,*) 'Relative error of the computed solution = ', err
!
        end if
!
    else
!
        write (prtunit,*) name_proc//' is incorrect'
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the solution of a linear real system of size ', &
       n, ' by ', n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_lu_cmp
! =========================
!
end program ex1_lu_cmp

ex1_lu_cmp2.F90

program ex1_lu_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LU_CMP2
!   in module Lin_Procedures for computing a LU decomposition of a real
!   square matrix and the inverse of such matrix.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, one, true, false,   &
                         lu_cmp2, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SQUARE MATRIX
!
    integer(i4b), parameter :: prtunit=6, n=4000
!
    character(len=*), parameter :: name_proc='Example 1 of lu_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, d1, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, ainv, res
!
    integer(i4b)                            :: j
    integer(i4b), dimension(:), allocatable :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE INVERSE OF A REAL n-BY-n MATRIX
!               BY USING THE LU DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), ainv(n,n), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE A COPY OF THE MATRIX a.
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE INVERSE OF A SQUARE REAL MATRIX BY COMPUTING
!   THE LU DECOMPOSITION WITH PARTIAL PIVOTING AND
!   IMPLICIT ROW SCALING OF MATRIX a. IF ON OUTPUT OF lu_cmp2
!   d1 IS DIFFERENT FROM ZERO THEN THE MATRIX IS NOT SINGULAR
!   AND THE INVERSE OF a HAS BEEN COMPUTED.
!
    call lu_cmp2( a, ip, d1, matinv=ainv )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1==zero ) then
!
!       ANORMAL EXIT FROM lu_cmp2 SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of LU_CMP2 subroutine, d1=', d1
        write (prtunit,*)
!
    else if ( do_test ) then
!
!       COMPUTE a TIMES ITS INVERSE - IDENTITY.
!
        res = matmul( a2, ainv )
!
        do j = 1_i4b, n
            res(j,j) = res(j,j) - one
        end do
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        err = norm( res(:n,:n) ) /( real(n,stnd)*norm(a2(:n,:n)) )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
        deallocate( a, ainv, ip, a2, res )
    else
        deallocate( a, ainv, ip )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1/=zero ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the inverse of a  real matrix of size ', &
       n, ' by ', n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_lu_cmp2
! ==========================
!
end program ex1_lu_cmp2

ex1_matmul2.F90

program ex1_matmul2
!
!
! Purpose
! =======
!
!   This program illustrates the use of function MATMUL2 in module Module_Utilities
!   and compares its efficiency with the intrinsic MATMUL function for the multiplcation
!   of two real matrices.
!
!   If OpenMP or an optimized BLAS library are used, MATMUL2 function must be much
!   faster than the intrinsic MATMUL function for many compilers.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, norm, matmul2, merror, allocate_error
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n, p AND m ARE THE DIMENSIONS OF THE TWO GENERATED MATRICES.
!
    integer(i4b), parameter :: prtunit=6, n=10000, p=5000, m=10000
!
    character(len=*), parameter :: name_proc='Example 1 of matmul2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, eps, elapsed_time1, elapsed_time2
    real(stnd), dimension(:,:), allocatable :: a, b, c, c2, resid
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : MULTIPLICATION OF TWO REAL MATRICES.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,p), b(p,m), c(n,m), c2(n,m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE THE RANDOM REAL MATRICES a AND b.
!
    call random_number( a(:n,:p) )
    call random_number( b(:p,:m) )
!
!   MULTIPLY THE TWO MATRICES WITH matmul2 FUNCTION.
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
    c2(:n,:m) = matmul2( a(:n,:p), b(:p,:m) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time1 = real( itime, stnd )/real( irate, stnd )
!
!   NOW RECOMPUTE THE MATRIX PRODUCT WITH matmul INTRINSIC FUNCTION.
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
    c(:n,:m) = matmul( a(:n,:p), b(:p,:m) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time2 = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( resid(n,m), stat=iok )
!
        if ( iok/=0 ) then
           call merror( name_proc//allocate_error )
        end if
!
        resid(:n,:m) = abs( c2(:n,:m) - c(:n,:m) )
!
!       CHECK THE RESULTS.
!
        err = maxval( resid(:n,:m) )/norm( c )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, c, c2, resid )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, c, c2 )
!
    end if
!
!   CHECK AND PRINT THE RESULTS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i5,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for multiplying two real matrices of sizes ', n, ' by ', p, ' and ', p, ' by ', m,  &
       ' with matmul2() function is ', elapsed_time1, ' seconds'
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i5,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for multiplying two real matrices of sizes ', n, ' by ', p, ' and ', p, ' by ', m,  &
       ' with the intrinsic matmul() function is ', elapsed_time2, ' seconds'
!
!
! END OF PROGRAM ex1_matmul2
! ==========================
!
end program ex1_matmul2

ex1_normal_random_number2_.F90

program ex1_normal_random_number2_
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine NORMAL_RANDOM_NUMBER2_ and
!   function NORMAL_RAND_NUMBER2 in module Random for generating arrays of real random numbers
!   (of kind EXTD) following a Gaussian distribution by the Cumulative Density Function (CDF)
!   inversion method.
!
!
! LATEST REVISION : 01/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, extd,  merror, allocate_error, random_seed_,  &
                         normal_rand_number2, normal_random_number2_
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n1 AND n2 ARE THE DIMENSIONS OF THE GENERATED ARRAY.
!
    integer(i4b), parameter :: prtunit=6, n1=10**4, n2=10**4
!
    character(len=*), parameter :: name_proc='Example 1 of normal_random_number2_'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: elapsed_time
    real(extd), allocatable, dimension(:,:) :: real_mat
!
    integer(i4b) :: i, j
    integer      :: iok, istart, iend, irate, imax, itime
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   ALLOCATE WORK ARRAY.
!
    allocate( real_mat(n1,n2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!   alg CAN BE CHOOSEN BETWEEN 1 AND 10.
!
    call random_seed_( alg=2 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
    call system_clock( count_rate=irate, count_max=imax  )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING FUNCTION normal_rand_number3().
!
    do i = 1_i4b, n2
!
        do j = 1_i4b, n1
!
             real_mat(j,i) = normal_rand_number2( )
!
        end do
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random normal real numbers with function normal_rand_number2() is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
    do i = 1_i4b, n2
!
        call normal_random_number2_( real_mat(:n1,i) )
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random normal real numbers with vector form of subroutine normal_random_number2_ is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number2_.
!
    call normal_random_number2_( real_mat(:n1,:n2) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random normal real numbers with matrix form of subroutine normal_random_number2_ is',  &
      elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAY.
!
    deallocate( real_mat )
!
!
! END OF PROGRAM ex1_normal_random_number2_
! =========================================
!
end program ex1_normal_random_number2_

ex1_normal_random_number3_.F90

program ex1_normal_random_number3_
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine NORMAL_RANDOM_NUMBER3_ and
!   function NORMAL_RAND_NUMBER3 in module Random for generating arrays of real random numbers
!   following a Gaussian distribution by the classical Box-Muller method.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd,  merror, allocate_error, random_seed_,  &
                         normal_rand_number3, normal_random_number3_
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!
!
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n1 AND n2 ARE THE DIMENSIONS OF THE GENERATED ARRAY.
!
    integer(i4b), parameter :: prtunit=6, n1=10**4, n2=10**4
!
    character(len=*), parameter :: name_proc='Example 1 of normal_random_number3_'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: elapsed_time
    real(stnd), allocatable, dimension(:,:) :: real_mat
!
    integer(i4b) :: i, j
    integer      :: iok, istart, iend, irate, imax, itime
!
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   ALLOCATE WORK ARRAY.
!
    allocate( real_mat(n1,n2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!   alg CAN BE CHOOSEN BETWEEN 1 AND 10.
!
    call random_seed_( alg=2 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
    call system_clock( count_rate=irate, count_max=imax  )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING FUNCTION normal_rand_number3().
!
    do i = 1_i4b, n2
!
        do j = 1_i4b, n1
!
             real_mat(j,i) = normal_rand_number3( )
!
        end do
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random normal real numbers with function normal_rand_number3() is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
    do i = 1_i4b, n2
!
        call normal_random_number3_( real_mat(:n1,i) )
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random normal real numbers with vector form of subroutine normal_random_number3_ is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number3_.
!
    call normal_random_number3_( real_mat(:n1,:n2) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random normal real numbers with matrix form of subroutine normal_random_number3_ is',  &
      elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAY.
!
    deallocate( real_mat )
!
!
! END OF PROGRAM ex1_normal_random_number3_
! =========================================
!
end program ex1_normal_random_number3_

ex1_normal_random_number_.F90

program ex1_normal_random_number_
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine NORMAL_RANDOM_NUMBER_ and
!   function NORMAL_RAND_NUMBER in module Random for generating arrays of real random numbers
!   following a Gaussian distribution by the Cumulative Density Function (CDF) inversion method.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd,  merror, allocate_error, random_seed_,  &
                         normal_rand_number, normal_random_number_
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n1 AND n2 ARE THE DIMENSIONS OF THE GENERATED ARRAY.
!
    integer(i4b), parameter :: prtunit=6, n1=10**4, n2=10**4
!
    character(len=*), parameter :: name_proc='Example 1 of normal_random_number_'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: elapsed_time
    real(stnd), allocatable, dimension(:,:) :: real_mat
!
    integer(i4b) :: i, j
    integer      :: iok, istart, iend, irate, imax, itime
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   ALLOCATE WORK ARRAY.
!
    allocate( real_mat(n1,n2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!   alg CAN BE CHOOSEN BETWEEN 1 AND 10.
!
    call random_seed_( alg=2 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
    call system_clock( count_rate=irate, count_max=imax  )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING FUNCTION normal_rand_number().
!
    do i = 1_i4b, n2
!
        do j = 1_i4b, n1
!
             real_mat(j,i) = normal_rand_number( )
!
        end do
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random normal real numbers with function normal_rand_number() is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
    do i = 1_i4b, n2
!
        call normal_random_number_( real_mat(:n1,i) )
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random normal real numbers with vector form of subroutine normal_random_number_ is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number_.
!
    call normal_random_number_( real_mat(:n1,:n2) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random normal real numbers with matrix form of subroutine normal_random_number_ is',  &
      elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAY.
!
    deallocate( real_mat )
!
!
! END OF PROGRAM ex1_normal_random_number_
! ========================================
!
end program ex1_normal_random_number_

ex1_ortho_gen_q_bd.F90

program ortho_gen_q_bd
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines BD_CMP, ORTHO_GEN_Q_BD
!   and ORTHO_GEN_P_BD in module SVD_Procedures for reducing to bidiagonal form a real matrix.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, bd_cmp,         &
                         ortho_gen_q_bd, ortho_gen_p_bd, norm, unit_matrix,      &
                         merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000, nm=min(n,m)
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of ortho_gen_q_bd'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, tauq, taup
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid, bd, p
!
    integer(i4b) :: l
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : BIDIAGONAL REDUCTION OF A m-by-n REAL MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), p(n,nm), d(nm), e(nm),     &
              tauq(nm), taup(nm), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-by-n REAL RANDOM DATA MATRIX .
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), bd(nm,nm), resid(nm,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   FIRST, CALL bd_cmp TO REDUCE THE MATRIX a TO BIDIAGONAL FORM
!
!                      a = Q*BD*P**(t)
!
!   WHERE Q AND P ARE ORTHOGONAL AND BD IS AN UPPER OR LOWER BIDIAGONAL MATRIX.
!
    call bd_cmp( a, d, e, tauq, taup )
!
!   ON OUTPUT OF bd_cmp:
!
!       a, tauq AND taup CONTAINS THE ELEMENTARY REFLECTORS
!       DEFINING Q AND P IN PACKED FORM.
!
!       d AND e CONTAINS RESPECTIVELY, THE DIAGONAL AND
!       SUBDIAGONAL OF THE BIDIAGONAL MATRIX BD.
!
!   SECOND, CALL ortho_gen_p_bd AND ortho_gen_q_bd TO GENERATE P AND Q.
!
    call ortho_gen_p_bd( a, taup, p )
!
!   ON OUTPUT OF ortho_gen_p_bd, p CONTAINS THE ORTHOGONAL MATRIX P.
!
    call ortho_gen_q_bd( a, tauq )
!
!   ON OUTPUT OF ortho_gen_q_bd, a CONTAINS THE FIRST min(n,m) COLUMNS OF Q.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION Q**(t)*a - BD*P**(t),
!
        bd(:nm,:nm) = zero
!
        if ( m>=n ) then
!
!           BD IS UPPER BIDIAGONAL.
!
            do l = 1_i4b, nm-1_i4b
                bd(l,l)       = d(l)
                bd(l,l+1_i4b) = e(l+1_i4b)
            end do
!
            bd(nm,nm) = d(nm)
!
        else
!
!           BD IS LOWER BIDIAGONAL.
!
            bd(1_i4b,1_i4b) = d(1_i4b)
!
            do l = 2_i4b, nm
                bd(l,l-1_i4b) = e(l)
                bd(l,l)       = d(l)
            end do
!
        endif
!
        resid(:nm,:n) = matmul( transpose(a(:m,:nm)), a2(:m,:n) )           &
                        - matmul( bd(:nm,:nm), transpose(p(:n,:nm )) )
!
        bd(:nm,1_i4b) = norm( resid(:nm,:n), dim=1_i4b )
        err1 =  maxval( bd(:nm,1_i4b) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q**(t)*Q.
!
        call unit_matrix( a2(:nm,:nm) )
!
        resid(:nm,:nm) = abs( a2(:nm,:nm) - matmul( transpose(a(:m,:nm )), a(:m,:nm ) ) )
        err2 = maxval( resid(:nm,:nm) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - P**(t)*P.
!
        resid(:nm,:nm) = abs( a2(:nm,:nm) - matmul( transpose(p(:n,:nm )), p(:n,:nm ) ) )
        err3 = maxval( resid(:nm,:nm) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, bd, resid )
!
    endif
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, p, d, e, tauq, taup )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed bidiagonal decomposition a = Q*BD*P**(t) = ', err1
        write (prtunit,*) 'Orthogonality of the computed Q orthogonal matrix                 = ', err2
        write (prtunit,*) 'Orthogonality of the computed P orthogonal matrix                 = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the bidiagonal reduction of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_ortho_gen_q_bd
! =================================
!
end program ex1_ortho_gen_q_bd

ex1_partial_qr_cmp.F90

program ex1_partial_qr_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines PARTIAL_QR_CMP in 
!   module Random and ORTHO_GEN_QR in module QR_Procedures.
!                                                                              
! LATEST REVISION : 05/02/2021
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, partial_qr_cmp,  &
                         ortho_gen_qr, norm, merror, allocate_error, gen_random_mat, random_seed_
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX,
! nqr IS THE TARGET RANK OF THE PARTIAL QR APPROXIMATION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, nsvd0=2000, nqr=10
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of partial_qr_cmp'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, tmp, norma, normr, eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, singval0
    real(stnd), allocatable, dimension(:,:) :: a, q, r, resid
!
    integer(i4b)                            :: i, krank, mat_type
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE A PARTIAL QR DECOMPOSITION WITH COLUMN PIVOTING
!               OF A DATA MATRIX.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 4_i4b
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE EXACT RANK OF THE PARTIAL QR APPROXIMATION.
!
!    tol = 0.000001_stnd
    tol = sqrt( ulp )
!
!   ALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        i = max( m, n )
!
        allocate( a(m,n), diagr(nqr), beta(nqr), ip(n), singval0(nsvd0),  &
                  q(m,m), r(nqr,n), resid(m,i), stat=iok )
!
    else
!
        allocate( a(m,n), diagr(nqr), beta(nqr), ip(n), singval0(nsvd0), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
    if ( do_test ) then
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A PARTIAL QR DECOMPOSITION WITH COLUMN PIVOTING OF A DATA MATRIX a 
!   WITH SUBROUTINE partial_qr_cmp. THE RANK OF THE PARTIAL QR DECOMPOSITION IS
!   DETERMINED BY THE SIZE OF THE ARGUMENT diagr, nqr = size(diagr) .
!
    call partial_qr_cmp( a(:m,:n), diagr(:nqr), beta(:nqr), ip(:n), krank, tol=tol )
!
!   THE ROUTINE COMPUTES A PARTIAL QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P ≈ Q * R = Q * [ R11  R12 ]
!
!   WHERE Q IS A m-BY-nqr MATRIX WITH ORTHOGONAL COLUMNS, R IS A nqr-BY-n UPPER OR TRAPEZOIDAL
!   MATRIX AND R11 IS A nqr-BY-nqr UPPER TRIANGULAR MATRIX.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS PARTIAL QR FACTORIZATION:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:nqr) AND THE ARRAY
!     beta(:nqr) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX R. THE ELEMENTS
!     OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr. 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF THE QR APPROXIMATION (e.g. THE RANK OF R11),
!     WHICH IS EQUAL TO nqr IF THE RANK OF a IS GREATER THAN nqr .
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. THEN, tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R11, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE PARTIAL QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE NUMERICAL RANK OF R11 AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R11 ARE NOT
!       PERFORMED AND THE RANK OF R11 IS ASSUMED TO BE EQUAL TO nqr.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE EXCLUDED FROM THE QR APPROXIMATION BY USING tol=RELATIVE PRECISION
!   OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE FROBENIUS NORM OF THE RESIDUAL MATRIX a(:m,:n)*P(:n,:n) - Q(:m,:krank)*R(:krank,:n).
!
    normr = norm( a(krank+1_i4b:m,krank+1_i4b:n) )
!
!   COMPUTE RELATIVE ERROR OF THE QR APPROXIMATION.
!
    err1 = normr/norma
!                
    if ( do_test ) then
!
!       GENERATE ORTHOGONAL MATRIX Q OF PARTIAL QR DECOMPOSITION OF DATA MATRIX a .
!
        q(:m,:krank) = a(:m,:krank)
!
        call ortho_gen_qr( q(:m,:m), beta(:krank) )
!
!       HERE ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS THE PRODUCT OF nid ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(krank)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp, partial_rqr_cmp2
!       partial_rtqr_cmp, id_cmp AND ts_id_cmp SUBROUTINES.
!
!       THE SIZE OF beta DETERMINES THE NUMBER OF ELEMENTARY REFLECTORS WHOSE PRODUCT
!       DEFINES THE MATRIX Q.
!
!       RESTORE THE UPPER TRIANGULAR MATRIX R FROM THE QR FACTORIZATION OF a .
!
        do i = 1_i4b, nqr
!
            r(:i-1_i4b,i)    = a(:i-1_i4b,i)
            r(i,i)           = diagr(i) 
            r(i+1_i4b:nqr,i) = zero
!
        end do
!
        do i = nqr+1_i4b, n
!
            r(:nqr,i) = a(:nqr,i)
!
        end do
!
!       APPLY PERMUTATION P TO a .
!
        do i = 1_i4b, n
!
            a(:m,i) = resid(:m,ip(i))
!
        end do
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION R(:krank,:n) - Q(:m,:krank)'*a*P .
!
        resid(:krank,:n) = abs( r(:krank,:n) - matmul( transpose(q(:m,:krank)), a(:m,:n) ) )
        err2 = maxval( resid(:krank,:n) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF (a*P)(:m,:krank) AND ITS ORTHOGONAL COMPLEMENT Q(:m,krank+1:m).
!
        if ( m>krank ) then
!
            resid(:krank,krank+1:m) = matmul( transpose(a(:m,:krank)), q(:m,krank+1:m) )
            err3 = maxval( abs( resid(:krank,krank+1:m) ) )/real(m,stnd)
!
        else
!
            err3 = zero
!
        end if
!
        err = max( err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( q, r, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip, singval0 )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the partial QR approximation     &
                       &                                        = ', krank
!        
    write (prtunit,*) 'Accuracy of the partial QR decomposition &
                       &||A - Q*R||_F/||A||_F                   = ', err1
!
    if ( do_test ) then
!        
        write (prtunit,*) 'Accuracy of the range of the partial QR &
                          &approximation                            = ', err2
!
        if ( m>krank ) then
            write (prtunit,*) 'Orthogonality of the range of the QR approximation&
                               & and its orthogonal complement = ', err3
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a partial QR decomposition with column pivoting of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_partial_qr_cmp
! =================================
!
end program ex1_partial_qr_cmp

ex1_partial_qr_cmp_fixed_precision.F90

program ex1_partial_qr_cmp_fixed_precision
!
!
! Purpose
! =======
!
!   This program illustrates how to compute a deterministic partial QR factorization with column
!   pivoting of a matrix, which fullfills a given relative error in Frobenius norm using subroutine
!   PARTIAL_QR_CMP_FIXED_PRECISION in module QR_Procedures.
!                                                                              
!                                                                              
! LATEST REVISION : 18/03/2022
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, two, three, seven, c30, c50, allocate_error, &
                         merror, norm, unit_matrix, random_seed_, singval_sort, gen_random_mat,               &
                         ortho_gen_qr, partial_qr_cmp_fixed_precision
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX,
! relerr0 IS THE REQUESTED TOLERANCE FOR THE RELATIVE ERROR OF THE PARTIAL QR FACTORIZATION
! WITH COLUMN PIVOTING IN FROBENIUS NORM.
!
    integer(i4b), parameter :: prtunit=6, m=20000, n=10000, mn=min(m,n), nsvd0=500
!   
    real(stnd), parameter  :: fudge=c50, relerr0=0.2_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of partial_qr_cmp_fixed_precision'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: ulp, err, err2, eps, elapsed_time, norma, tmp, relerr, relerr2
    real(stnd), dimension(:,:), allocatable :: a, a2, id, b
    real(stnd), dimension(:),   allocatable :: singval0, diagr, beta
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nqr, i, j, mat_type
    integer(i4b), allocatable, dimension(:) :: ip
!
    logical(lgl) :: do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL QR FACTORIZATION WITH COLUMN PIVOTING OF RANK nqr OF A m-BY-n REAL MATRIX
!               USING A DETERMINISTIC ALGORITHM AS
!
!                              a(:m,:n) ≈ q(:m,:nqr)*b(:nqr,:n)
!
!               WHERE q IS A m-BY-nqr MATRIX WITH ORTHONORMAL COLUMNS, b IS A nqr-BY-n MATRIX AND
!               THE PRODUCT q*b IS A GOOD APPROXIMATION OF a ACCORDING TO THE SPECTRAL OR FROBENIUS
!               NORM. The RANK nqr IS DETERMINED SUCH THAT THE ASSOCIATED QR APPROXIMATION FULLFILLS
!               A PRESCRIBED RELATIVE ERROR IN THE FROBENIUS NORM AND IS NOT KNOWN IN ADVANCE.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ORTHOGONALITY OF THE q MATRIX
!   AND THE QUALITY OF THE APPROXIMATION.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
        allocate( a(m,n), a2(m,n), ip(n), singval0(nsvd0), diagr(mn), beta(mn), stat=iok )
    else
        allocate( a(m,n), ip(n), singval0(nsvd0), diagr(mn), beta(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SORT THE SINGULAR VALUES.
!
    call singval_sort( sort, singval0(:nsvd0) )
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
    norma = norm( singval0(:nsvd0) )
!
    if ( do_test ) then
!
!       SAVE THE MATRIX FOR LATER USE IF REQUIRED.
!
        a2 = a
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   partial_qr_cmp_fixed_precision COMPUTES AN APPROXIMATE PARTIAL QR DECOMPOSITION (WITH COLUMN PIVOTING) OF A REAL
!   m-BY-n MATRIX a, WITH A RANK AS SMALL AS POSSIBLE, BUT WHICH FULFILLS A PRESET TOLERANCE FOR ITS RELATIVE ERROR
!   IN THE FROBENIUS NORM:
!
!                                || A - Q*B ||_F <= ||A||_F * relerr
!
!   , WHERE Q*B IS THE COMPUTED PARTIAL QR APPROXIMATION, || ||_F IS THE FROBENIUS NORM AND
!   relerr IS A PRESCRIBED ACCURACY TOLERANCE FOR THE RELATIVE ERROR OF THE COMPUTED PARTIAL QR
!   APPROXIMATION, SPECIFIED IN THE INPUT ARGUMENT relerr.
!
!   HERE THE RANK, nqr, OF THE DETERMINISTIC PARTIAL QR DECOMPOSITION IS NOT KNOWN IN ADVANCE AND
!   IS DETERMINED IN THE SUBROUTINE.
!
!   FIRST, SET THE TOLERANCE FOR THE RELATIVE ERROR OF THE PARTIAL QR FACTORIZATION IN FROBENIUS NORM.
!
    relerr = relerr0
!
    call partial_qr_cmp_fixed_precision( a(:m,:n), relerr, diagr(:mn), beta(:mn), ip(:n), nqr )
!
!   THE ROUTINE RETURNS THE TWO FACTORS OF THE PARTIAL QR DECOMPOSITION, WHICH FULFILLS
!   THE PRESET TOLERANCE SPECIFIED IN ARGUMENT relerr, IN FACTORED FORM IN ARRAYS a, diagr, beta AND ip.
!
!   ON EXIT OF partial_qr_cmp_fixed_precision relerr CONTAINS THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL QR DECOMPOSITION.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE BEST RELATIVE ERROR FROM THE TRUNCATED SVD OF RANK nqr.
!
    relerr2 = norm( singval0(nqr+1_i4b:nsvd0)/norma )
!
!   TEST ACCURACY OF THE Q*B APPROXIMATION AND ORTHOGONALITY OF MATRIX Q IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( b(nqr,n), id(nqr,nqr), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       RESTORE PERMUTED TRIANGULAR FACTOR R OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       IN MATRIX b(:nqr,:n) .
!
        do j = 1_i4b, nqr
!
            b(1_i4b:j-1_i4b,ip(j)) = a(1_i4b:j-1_i4b,j)
            b(j,ip(j))             = diagr(j)
            b(j+1_i4b:nqr,ip(j))   = zero
!
        end do
!
        do j = nqr+1_i4b, n
!
            b(1_i4b:nqr,ip(j)) = a(1_i4b:nqr,j)
!
        end do
!
!       GENERATE ORTHOGONAL MATRIX Q OF PARTIAL QR DECOMPOSITION OF RANDOM DATA MATRIX a .
!
        call ortho_gen_qr( a(:m,:nqr), beta(:nqr) )
!
!       RECOMPUTE THE RELATIVE ERROR OF THE QR APPROXIMATION Q*B .
!
        a2(:m,:n) = a2(:m,:n) - matmul( a(:m,:nqr), b(:nqr,:n) )
!
!       CHECK ACCURACY OF THE RELATIVE ERROR COMPUTED BY partial_qr_cmp_fixed_precision.
!
        err = abs( norm( a2(:m,:n) )/norma - relerr )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - q(:m,:nqr)**(t)*q(:m,:nqr).
!
        call unit_matrix( id(:nqr,:nqr) )
!
        b(:nqr,:nqr) = abs( id(:nqr,:nqr) - matmul( transpose(a(:m,:nqr)), a(:m,:nqr) ) )
!
        err2 = maxval( b(:nqr,:nqr) )/real(m,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( id, b, a2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, singval0, ip )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( relerr<=relerr0 .and. err<=eps*norma ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Requested relative error in Frobenius norm                          = ', &
                      relerr0
    write (prtunit,*) 'Rank of the partial QR decomposition with column pivoting           = ', &
                      nqr
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-Q*B||_F / ||A||_F        = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-Q*B||_F / ||A||_F ) = ', &
                      relerr2
!
    if ( do_test ) then
        write (prtunit,*)
        write (prtunit,*) 'Orthogonality of the computed orthonormal matrix Q = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a deterministic partial QR approximation of rank ', nqr, ' of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_partial_qr_cmp_fixed_precision
! =================================================
!
end program ex1_partial_qr_cmp_fixed_precision

ex1_partial_rqr_cmp.F90

program ex1_partial_rqr_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines PARTIAL_RQR_CMP in 
!   module Random and ORTHO_GEN_QR in module QR_Procedures.
!                                                                              
! LATEST REVISION : 05/02/2021
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, partial_rqr_cmp, &
                         ortho_gen_qr, norm, merror, allocate_error, gen_random_mat, random_seed_
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX,
! nqr IS THE TARGET RANK OF THE PARTIAL QR APPROXIMATION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, nsvd0=2000, nqr=50
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of partial_rqr_cmp'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, tmp, norma, normr, eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, singval0
    real(stnd), allocatable, dimension(:,:) :: a, q, r, resid
!
    integer(i4b)                            :: i, krank, blk_size, nover, mat_type
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE A RANDOMIZED PARTIAL QR DECOMPOSITION WITH COLUMN PIVOTING
!               OF A DATA MATRIX.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 3_i4b
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE EXACT RANK OF THE PARTIAL QR APPROXIMATION.
!
!    tol = 0.000001_stnd
    tol = sqrt( ulp )
!
!   DETERMINE THE BLOCKSIZE PARAMETER IN THE RANDOMIZED PARTIAL QR ALGORITHM.
!
    blk_size = 20_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE PARAMETER IN THE RANDOMIZED PARTIAL QR ALGORITHM.
!
    nover = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        i = max( m, n )
!
        allocate( a(m,n), diagr(nqr), beta(nqr), ip(n), singval0(nsvd0),  &
                  q(m,m), r(nqr,n), resid(m,i), stat=iok )
!
    else
!
        allocate( a(m,n), diagr(nqr), beta(nqr), ip(n), singval0(nsvd0), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
    if ( do_test ) then
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A RANDOMIZED PARTIAL QR DECOMPOSITION WITH COLUMN PIVOTING OF A DATA MATRIX a
!   WITH SUBROUTINE partial_rqr_cmp. THE RANK OF THE PARTIAL QR DECOMPOSITION IS
!   DETERMINED BY THE SIZE OF THE ARGUMENT diagr, nqr = size(diagr) .
!
    call partial_rqr_cmp( a(:m,:n), diagr(:nqr), beta(:nqr), ip(:n), krank, tol=tol,  &
                          blk_size=blk_size, nover=nover )
!
!   THE ROUTINE COMPUTES A RANDOMIZED PARTIAL QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P ≈ Q * R = Q * [ R11  R12 ]
!
!   WHERE Q IS A m-BY-nqr MATRIX WITH ORTHOGONAL COLUMNS, R IS A nqr-BY-n UPPER OR TRAPEZOIDAL
!   MATRIX AND R11 IS A nqr-BY-nqr UPPER TRIANGULAR MATRIX.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS PARTIAL QR FACTORIZATION:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:nqr) AND THE ARRAY
!     beta(:nqr) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX R. THE ELEMENTS
!     OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr. 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF THE QR APPROXIMATION (e.g. THE RANK OF R11),
!     WHICH IS EQUAL TO nqr IF THE RANK OF a IS GREATER THAN nqr .
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. THEN, tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R11, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE PARTIAL QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE NUMERICAL RANK OF R11 AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R11 ARE NOT
!       PERFORMED AND THE RANK OF R11 IS ASSUMED TO BE EQUAL TO nqr.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE EXCLUDED FROM THE QR APPROXIMATION BY USING tol=RELATIVE PRECISION
!   OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE FROBENIUS NORM OF THE RESIDUAL MATRIX a(:m,:n)*P(:n,:n) - Q(:m,:krank)*R(:krank,:n).
!
    normr = norm( a(krank+1_i4b:m,krank+1_i4b:n) )
!
!   COMPUTE RELATIVE ERROR OF THE RANDOMIZED QR APPROXIMATION.
!
    err1 = normr/norma
!                
    if ( do_test ) then
!
!       GENERATE ORTHOGONAL MATRIX Q OF PARTIAL QR DECOMPOSITION OF DATA MATRIX a .
!
        q(:m,:krank) = a(:m,:krank)
!
        call ortho_gen_qr( q(:m,:m), beta(:krank) )
!
!       HERE ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS THE PRODUCT OF nid ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(krank)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp, partial_rqr_cmp2
!       partial_rtqr_cmp, id_cmp AND ts_id_cmp SUBROUTINES.
!
!       THE SIZE OF beta DETERMINES THE NUMBER OF ELEMENTARY REFLECTORS WHOSE PRODUCT
!       DEFINES THE MATRIX Q.
!
!       RESTORE THE UPPER TRIANGULAR MATRIX R FROM THE RANDOMIZED QR FACTORIZATION OF a .
!
        do i = 1_i4b, nqr
!
            r(:i-1_i4b,i)    = a(:i-1_i4b,i)
            r(i,i)           = diagr(i) 
            r(i+1_i4b:nqr,i) = zero
!
        end do
!
        do i = nqr+1_i4b, n
!
            r(:nqr,i) = a(:nqr,i)
!
        end do
!
!       APPLY PERMUTATION P TO a .
!
        do i = 1_i4b, n
!
            a(:m,i) = resid(:m,ip(i))
!
        end do
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION R(:krank,:n) - Q(:m,:krank)'*a*P .
!
        resid(:krank,:n) = abs( r(:krank,:n) - matmul( transpose(q(:m,:krank)), a(:m,:n) ) )
        err2 = maxval( resid(:krank,:n) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF (a*P)(:m,:krank) AND ITS ORTHOGONAL COMPLEMENT Q(:m,krank+1:m).
!
        if ( m>krank ) then
!
            resid(:krank,krank+1:m) = matmul( transpose(a(:m,:krank)), q(:m,krank+1:m) )
            err3 = maxval( abs( resid(:krank,krank+1:m) ) )/real(m,stnd)
!
        else
!
            err3 = zero
!
        end if
!
        err = max( err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( q, r, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip, singval0 )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the partial QR approximation     &
                       &                                        = ', krank
!        
    write (prtunit,*) 'Accuracy of the partial QR decomposition &
                       &||A - Q*R||_F/||A||_F                   = ', err1
!
    if ( do_test ) then
!        
        write (prtunit,*) 'Accuracy of the range of the partial QR &
                          &approximation                            = ', err2
!
        if ( m>krank ) then
            write (prtunit,*) 'Orthogonality of the range of the QR approximation&
                               & and its orthogonal complement = ', err3
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a randomized partial QR decomposition with column pivoting of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_partial_rqr_cmp
! ==================================
!
end program ex1_partial_rqr_cmp

ex1_partial_rqr_cmp2.F90

program ex1_partial_rqr_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines PARTIAL_RQR_CMP2 in 
!   module Random and ORTHO_GEN_QR in module QR_Procedures.
!                                                                              
! LATEST REVISION : 05/02/2021
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, partial_rqr_cmp2, &
                         ortho_gen_qr, norm, merror, allocate_error, gen_random_mat, random_seed_
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX,
! nqr IS THE TARGET RANK OF THE PARTIAL QR APPROXIMATION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, nsvd0=2000, nqr=50
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of partial_rqr_cmp2'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, tmp, norma, normr, eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, singval0
    real(stnd), allocatable, dimension(:,:) :: a, q, r, resid
!
    integer(i4b)                            :: i, krank, blk_size, nover, mat_type
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE A RANDOMIZED PARTIAL QR DECOMPOSITION WITH COLUMN PIVOTING
!               OF A DATA MATRIX.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 3_i4b
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE EXACT RANK OF THE PARTIAL QR APPROXIMATION.
!
!    tol = 0.000001_stnd
    tol = sqrt( ulp )
!
!   DETERMINE THE BLOCKSIZE PARAMETER IN THE RANDOMIZED PARTIAL QR ALGORITHM.
!
    blk_size = 20_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE PARAMETER IN THE RANDOMIZED PARTIAL QR ALGORITHM.
!
    nover = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        i = max( m, n )
!
        allocate( a(m,n), diagr(nqr), beta(nqr), ip(n), singval0(nsvd0),  &
                  q(m,m), r(nqr,n), resid(m,i), stat=iok )
!
    else
!
        allocate( a(m,n), diagr(nqr), beta(nqr), ip(n), singval0(nsvd0), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
    if ( do_test ) then
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A RANDOMIZED PARTIAL QR DECOMPOSITION WITH COLUMN PIVOTING OF A DATA MATRIX a
!   WITH SUBROUTINE partial_rqr_cmp2. THE RANK OF THE PARTIAL QR DECOMPOSITION IS
!   DETERMINED BY THE SIZE OF THE ARGUMENT diagr, nqr = size(diagr) .
!
    call partial_rqr_cmp2( a(:m,:n), diagr(:nqr), beta(:nqr), ip(:n), krank, tol=tol,  &
                           blk_size=blk_size, nover=nover )
!
!   THE ROUTINE COMPUTES A RANDOMIZED PARTIAL QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P ≈ Q * R = Q * [ R11  R12 ]
!
!   WHERE Q IS A m-BY-nqr MATRIX WITH ORTHOGONAL COLUMNS, R IS A nqr-BY-n UPPER OR TRAPEZOIDAL
!   MATRIX AND R11 IS A nqr-BY-nqr UPPER TRIANGULAR MATRIX.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS PARTIAL QR FACTORIZATION:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:nqr) AND THE ARRAY
!     beta(:nqr) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX R. THE ELEMENTS
!     OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr. 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF THE QR APPROXIMATION (e.g. THE RANK OF R11),
!     WHICH IS EQUAL TO nqr IF THE RANK OF a IS GREATER THAN nqr .
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. THEN, tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R11, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE PARTIAL QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE NUMERICAL RANK OF R11 AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R11 ARE NOT
!       PERFORMED AND THE RANK OF R11 IS ASSUMED TO BE EQUAL TO nqr.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE EXCLUDED FROM THE QR APPROXIMATION BY USING tol=RELATIVE PRECISION
!   OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE FROBENIUS NORM OF THE RESIDUAL MATRIX a(:m,:n)*P(:n,:n) - Q(:m,:krank)*R(:krank,:n).
!
    normr = norm( a(krank+1_i4b:m,krank+1_i4b:n) )
!
!   COMPUTE RELATIVE ERROR OF THE RANDOMIZED QR APPROXIMATION.
!
    err1 = normr/norma
!                
    if ( do_test ) then
!
!       GENERATE ORTHOGONAL MATRIX Q OF PARTIAL QR DECOMPOSITION OF DATA MATRIX a .
!
        q(:m,:krank) = a(:m,:krank)
!
        call ortho_gen_qr( q(:m,:m), beta(:krank) )
!
!       HERE ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS THE PRODUCT OF nid ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(krank)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp, partial_rqr_cmp2
!       partial_rtqr_cmp, id_cmp AND ts_id_cmp SUBROUTINES.
!
!       THE SIZE OF beta DETERMINES THE NUMBER OF ELEMENTARY REFLECTORS WHOSE PRODUCT
!       DEFINES THE MATRIX Q.
!
!       RESTORE THE UPPER TRIANGULAR MATRIX R FROM THE RANDOMIZED QR FACTORIZATION OF a .
!
        do i = 1_i4b, nqr
!
            r(:i-1_i4b,i)    = a(:i-1_i4b,i)
            r(i,i)           = diagr(i) 
            r(i+1_i4b:nqr,i) = zero
!
        end do
!
        do i = nqr+1_i4b, n
!
            r(:nqr,i) = a(:nqr,i)
!
        end do
!
!       APPLY PERMUTATION P TO a .
!
        do i = 1_i4b, n
!
            a(:m,i) = resid(:m,ip(i))
!
        end do
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION R(:krank,:n) - Q(:m,:krank)'*a*P .
!
        resid(:krank,:n) = abs( r(:krank,:n) - matmul( transpose(q(:m,:krank)), a(:m,:n) ) )
        err2 = maxval( resid(:krank,:n) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF (a*P)(:m,:krank) AND ITS ORTHOGONAL COMPLEMENT Q(:m,krank+1:m).
!
        if ( m>krank ) then
!
            resid(:krank,krank+1:m) = matmul( transpose(a(:m,:krank)), q(:m,krank+1:m) )
            err3 = maxval( abs( resid(:krank,krank+1:m) ) )/real(m,stnd)
!
        else
!
            err3 = zero
!
        end if
!
        err = max( err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( q, r, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip, singval0 )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the partial QR approximation     &
                       &                                        = ', krank
!        
    write (prtunit,*) 'Accuracy of the partial QR decomposition &
                       &||A - Q*R||_F/||A||_F                   = ', err1
!
    if ( do_test ) then
!        
        write (prtunit,*) 'Accuracy of the range of the partial QR &
                          &approximation                            = ', err2
!
        if ( m>krank ) then
            write (prtunit,*) 'Orthogonality of the range of the QR approximation&
                               & and its orthogonal complement = ', err3
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a randomized partial QR decomposition with column pivoting of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_partial_rqr_cmp2
! ===================================
!
end program ex1_partial_rqr_cmp2

ex1_partial_rqr_cmp_fixed_precision.F90

program ex1_partial_rqr_cmp_fixed_precision
!
!
! Purpose
! =======
!
!   This program illustrates how to compute a randomized partial QR factorization with column
!   pivoting of a matrix, which fullfills a given relative error in Frobenius norm,
!   using subroutine PARTIAL_RQR_CMP_FIXED_PRECISION in module Random.
!                                                                              
!                                                                              
! LATEST REVISION : 18/03/2022
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, two, three, seven, c30, c50,   &
                         allocate_error, merror, norm, unit_matrix, random_seed_, singval_sort, &
                         gen_random_mat, ortho_gen_qr, partial_rqr_cmp_fixed_precision
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX,
! relerr0 IS THE REQUESTED TOLERANCE FOR THE RELATIVE ERROR OF THE RANDOMIZED PARTIAL
! QR FACTORIZATION WITH COLUMN PIVOTING IN FROBENIUS NORM.
!
    integer(i4b), parameter :: prtunit=6, m=5000, n=1000, mn=min(m,n), nsvd0=500
!   
    real(stnd), parameter  :: fudge=c50, relerr0=0.2_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of partial_rqr_cmp_fixed_precision'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: ulp, err, err2, eps, elapsed_time, norma, tmp, relerr, relerr2
    real(stnd), dimension(:,:), allocatable :: a, a2, id, b
    real(stnd), dimension(:),   allocatable :: singval0, diagr, beta
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nqr, blk_size, nover, i, j, mat_type
    integer(i4b), allocatable, dimension(:) :: ip
!
    logical(lgl) :: do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL QR FACTORIZATION WITH COLUMN PIVOTING OF RANK nqr OF A m-BY-n REAL MATRIX
!               USING A RANDOMIZED ALGORITHM AS
!
!                              a(:m,:n) ≈ q(:m,:nqr)*b(:nqr,:n)
!
!               WHERE q IS A m-BY-nqr MATRIX WITH ORTHONORMAL COLUMNS, b IS A nqr-BY-n MATRIX AND
!               THE PRODUCT q*b IS A GOOD APPROXIMATION OF a ACCORDING TO THE SPECTRAL OR FROBENIUS
!               NORM. The RANK nqr IS DETERMINED SUCH THAT THE ASSOCIATED QR APPROXIMATION FULLFILLS
!               A PRESCRIBED RELATIVE ERROR IN THE FROBENIUS NORM AND IS NOT KNOWN IN ADVANCE.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED PARTIAL QR ALGORITHM.
!
!   THE RANK nqr OF THE COMPUTED PARTIAL QR FACTORIZATION WITH COLUMN PIVOTING
!   COMPUTED BY partial_rqr_cmp_fixed_precision IS ALWAYS A MULTIPLE OF blk_size.
!   THUS, CHOOSING blk_size INVOLVES TRADEOFFS BETWEEN SPEED AND A RANK AS SMALL
!   AS POSSIBLE FOR THE COMPUTED PARTIAL QR FACTORIZATION.
!
    blk_size = 20_i4b
!
!   CHOOSE THE OVERSAMPLING SIZE USED IN THE RANDOMIZED PARTIAL QR FACTORIZATION
!   WITH COLUMN PIVOTING.
!
    nover = 10_i4b
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ORTHOGONALITY OF THE q MATRIX
!   AND THE QUALITY OF THE APPROXIMATION.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
        allocate( a(m,n), a2(m,n), ip(n), singval0(nsvd0), diagr(mn), beta(mn), stat=iok )
    else
        allocate( a(m,n), ip(n), singval0(nsvd0), diagr(mn), beta(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SORT THE SINGULAR VALUES.
!
    call singval_sort( sort, singval0(:nsvd0) )
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
    norma = norm( singval0(:nsvd0) )
!
    if ( do_test ) then
!
!       SAVE THE MATRIX FOR LATER USE IF REQUIRED.
!
        a2 = a
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   partial_rqr_cmp_fixed_precision COMPUTES AN APPROXIMATE PARTIAL QR DECOMPOSITION (WITH COLUMN PIVOTING) OF A REAL
!   m-BY-n MATRIX a, WITH A RANK AS SMALL AS POSSIBLE, BUT WHICH FULFILLS A PRESET TOLERANCE FOR ITS RELATIVE ERROR
!   IN THE FROBENIUS NORM:
!
!                                || A - Q*B ||_F <= ||A||_F * relerr
!
!   , WHERE Q*B IS THE COMPUTED PARTIAL QR APPROXIMATION, || ||_F IS THE FROBENIUS NORM AND
!   relerr IS A PRESCRIBED ACCURACY TOLERANCE FOR THE RELATIVE ERROR OF THE COMPUTED PARTIAL QR
!   APPROXIMATION, SPECIFIED IN THE INPUT ARGUMENT relerr.
!
!   HERE THE RANK, nqr, OF THE RANDOMIZED PARTIAL QR DECOMPOSITION IS NOT KNOWN IN ADVANCE AND
!   IS DETERMINED IN THE SUBROUTINE.
!
!   FIRST, SET THE TOLERANCE FOR THE RELATIVE ERROR OF THE PARTIAL QR FACTORIZATION IN FROBENIUS NORM.
!
    relerr = relerr0
!
    call partial_rqr_cmp_fixed_precision( a(:m,:n), relerr, diagr(:mn), beta(:mn), ip(:n), nqr, &
                                          blk_size=blk_size, nover=nover )
!
!   THE ROUTINE RETURNS THE TWO FACTORS OF THE PARTIAL QR DECOMPOSITION, WHICH FULFILLS
!   THE PRESET TOLERANCE SPECIFIED IN ARGUMENT relerr, IN FACTORED FORM IN ARRAYS a, diagr, beta AND ip.
!
!   ON EXIT OF partial_rqr_cmp_fixed_precision relerr CONTAINS THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL QR DECOMPOSITION.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE BEST RELATIVE ERROR FROM THE TRUNCATED SVD OF RANK nqr.
!
    relerr2 = norm( singval0(nqr+1_i4b:nsvd0)/norma )
!
!   TEST ACCURACY OF THE Q*B APPROXIMATION AND ORTHOGONALITY OF MATRIX Q IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( b(nqr,n), id(nqr,nqr), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       RESTORE PERMUTED TRIANGULAR FACTOR R OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       IN MATRIX b(:nqr,:n) .
!
        do j = 1_i4b, nqr
!
            b(1_i4b:j-1_i4b,ip(j)) = a(1_i4b:j-1_i4b,j)
            b(j,ip(j))             = diagr(j)
            b(j+1_i4b:nqr,ip(j))   = zero
!
        end do
!
        do j = nqr+1_i4b, n
!
            b(1_i4b:nqr,ip(j)) = a(1_i4b:nqr,j)
!
        end do
!
!       GENERATE ORTHOGONAL MATRIX Q OF PARTIAL QR DECOMPOSITION OF RANDOM DATA MATRIX a .
!
        call ortho_gen_qr( a(:m,:nqr), beta(:nqr) )
!
!       RECOMPUTE THE RELATIVE ERROR OF THE QR APPROXIMATION Q*B .
!
        a2(:m,:n) = a2(:m,:n) - matmul( a(:m,:nqr), b(:nqr,:n) )
!
!       CHECK ACCURACY OF THE RELATIVE ERROR COMPUTED BY partial_rqr_cmp_fixed_precision.
!
        err = abs( norm( a2(:m,:n) )/norma - relerr )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - q(:m,:nqr)**(t)*q(:m,:nqr).
!
        call unit_matrix( id(:nqr,:nqr) )
!
        b(:nqr,:nqr) = abs( id(:nqr,:nqr) - matmul( transpose(a(:m,:nqr)), a(:m,:nqr) ) )
!
        err2 = maxval( b(:nqr,:nqr) )/real(m,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( id, b, a2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, singval0, ip )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( relerr<=relerr0 .and. err<=eps*norma ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Requested relative error in Frobenius norm                          = ', &
                      relerr0
    write (prtunit,*) 'Block size used in the randomized partial QR decomposition          = ', &
                      blk_size
    write (prtunit,*) 'Rank of the partial QR decomposition with column pivoting           = ', &
                      nqr
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-Q*B||_F / ||A||_F        = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-Q*B||_F / ||A||_F ) = ', &
                      relerr2
!
    if ( do_test ) then
        write (prtunit,*)
        write (prtunit,*) 'Orthogonality of the computed orthonormal matrix Q = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a randomized partial QR approximation of rank ', nqr, ' of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_partial_rqr_cmp_fixed_precision
! ==================================================
!
end program ex1_partial_rqr_cmp_fixed_precision

ex1_partial_rtqr_cmp.F90

program ex1_partial_rtqr_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines PARTIAL_RTQR_CMP in 
!   module Random and ORTHO_GEN_QR in module QR_Procedures.
!                                                                              
! LATEST REVISION : 05/02/2021
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, partial_rtqr_cmp, &
                         ortho_gen_qr, norm, merror, allocate_error, gen_random_mat, random_seed_
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX,
! nqr IS THE TARGET RANK OF THE PARTIAL QR APPROXIMATION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, nsvd0=2000, nqr=100
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of partial_rtqr_cmp'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, tmp, norma, eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, singval0
    real(stnd), allocatable, dimension(:,:) :: a, q, r, resid
!
    integer(i4b)                            :: i, j, krank, niter, nover, mat_type
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE A RANDOMIZED PARTIAL AND TRUNCATED QR DECOMPOSITION WITH COLUMN PIVOTING
!               OF A DATA MATRIX.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 4_i4b
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp  = epsilon( ulp )
    eps  = fudge*ulp
    err3 = zero
!
!   SET TOLERANCE FOR DETERMINING THE EXACT RANK OF THE PARTIAL QR APPROXIMATION.
!
!    tol = 0.000001_stnd
    tol = sqrt( ulp )
!
!   DETERMINE THE NUMBER OF SUBSPACE ITERATIONS PERFORMED FOR IMPROVING THE QUALITY
!   OF THE COMPRESSION MATRIX USED IN THE RANDOMIZED PARTIAL QR ALGORITHM.
!
    niter = 2_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE PARAMETER FOR IMPROVING THE QUALITY
!   OF THE COMPRESSION MATRIX USED IN THE RANDOMIZED PARTIAL QR ALGORITHM.
!
    nover = max( 10_i4b, nqr/2_i4b )
!
!   ALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        i = max( m, n )
!
        allocate( a(m,n), diagr(nqr), beta(nqr), ip(n), singval0(nsvd0),  &
                  r(nqr,n), q(m,i), resid(m,i), stat=iok )
!
    else
!
        allocate( a(m,n), diagr(nqr), beta(nqr), ip(n), singval0(nsvd0),  &
                  r(nqr,n), q(m,nqr), resid(m,n), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
!   MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
    resid(:m,:n) = a(:m,:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A RANDOMIZED PARTIAL AND TRUNCATED QR DECOMPOSITION WITH COLUMN PIVOTING OF A DATA
!   MATRIX a WITH SUBROUTINE partial_rtqr_cmp. THE RANK OF THE PARTIAL QR DECOMPOSITION IS
!   DETERMINED BY THE SIZE OF THE ARGUMENT diagr, nqr = size(diagr) .
!
    call partial_rtqr_cmp( a(:m,:n), diagr(:nqr), beta(:nqr), ip(:n), krank, tol=tol,  &
                           niter=niter, nover=nover )
!
!   THE ROUTINE COMPUTES AN APPROXIMATE RANDOMIZED PARTIAL AND TRUNCATED QR FACTORIZATION
!   WITH COLUMN PIVOTING OF a AS:
!
!                     a * P ≈ Q * R = Q * [ R11  R12 ]
!
!   WHERE Q IS A m-BY-nqr MATRIX WITH ORTHOGONAL COLUMNS, R IS A nqr-BY-n UPPER OR TRAPEZOIDAL
!   MATRIX AND R11 IS A nqr-BY-nqr UPPER TRIANGULAR MATRIX.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS PARTIAL QR FACTORIZATION:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:nqr) AND THE ARRAY
!     beta(:nqr) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX R. THE ELEMENTS
!     OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr. 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF THE QR APPROXIMATION (e.g. THE RANK OF R11),
!     WHICH IS EQUAL TO nqr IF THE RANK OF a IS GREATER THAN nqr .
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. THEN, tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R11, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE PARTIAL QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE NUMERICAL RANK OF R11 AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R11 ARE NOT
!       PERFORMED AND THE RANK OF R11 IS ASSUMED TO BE EQUAL TO nqr.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE EXCLUDED FROM THE QR APPROXIMATION BY USING tol=RELATIVE PRECISION
!   OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   NOW, RESTORE TRIANGULAR FACTOR R OF PARTIAL QR DECOMPOSITION OF DATA MATRIX a
!   IN MATRIX r(:nqr,:n) .
!
    do j = 1_i4b, nqr
!
        r(1_i4b:j-1_i4b,j) = a(1_i4b:j-1_i4b,j)
        r(j,j)             = diagr(j)
        r(j+1_i4b:nqr,j)   = zero
!
    end do
!
    do j = nqr+1_i4b, n
!
        r(1_i4b:nqr,j) = a(1_i4b:nqr,j)
!
    end do
!
!   GENERATE ORTHOGONAL MATRIX q OF PARTIAL QR DECOMPOSITION OF DATA MATRIX a .
!
    q(:m,:krank) = a(:m,:krank)
!                
    if ( do_test ) then
!
        i = m
!
    else
!
        i = krank
!
    end if
!
    call ortho_gen_qr( q(:m,:i), beta(:krank) )
!
!   HERE ortho_gen_qr GENERATES AN m-BY-l REAL MATRIX WITH ORTHONORMAL COLUMNS, WHICH IS
!   DEFINED AS THE FIRST l COLUMNS OF A PRODUCT OF krank ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(krank)
!
!   AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp, partial_rqr_cmp2
!   AND partial_rtqr_cmp SUBROUTINES.
!
!   THE SIZE OF beta DETERMINES THE NUMBER OF ELEMENTARY REFLECTORS WHOSE PRODUCT
!   DEFINES THE MATRIX Q.
!
!   APPLY PERMUTATION TO a .
!
    do j = 1_i4b, n
!
        a(:m,j) = resid(:m,ip(j))
!
    end do
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a(:m,:n)*P(:n,:n) - Q(:m,:krank)*R(:krank,:n).
!
    resid(:m,:n) = a(:m,:n) - matmul( q(:m,:krank), r(:krank,:n) )
!
!   COMPUTE RELATIVE ERROR OF THE QR APPROXIMATION.
!
    err1 = norm( resid(:m,:n) )/norma
!                
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION R(:krank,:n) - Q(:m,:krank)'*a*P .
!
        resid(:krank,:n) = abs( r(:krank,:n) - matmul( transpose(q(:m,:krank)), a(:m,:n) ) )
        err2 = maxval( resid(:krank,:n) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF (a*P)(:m,:krank) AND ITS ORTHOGONAL COMPLEMENT Q(:m,krank+1:m).
!
        if ( m>krank ) then
!
            resid(:krank,krank+1:m) = matmul( transpose(a(:m,:krank)), q(:m,krank+1:m) )
            err3 = maxval( abs( resid(:krank,krank+1:m) ) )/real(m,stnd)
!
        else
!
            err3 = zero
!
        end if
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip, singval0, r, q, resid )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err3<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the partial QR approximation     &
                       &                                        = ', krank
!        
    write (prtunit,*) 'Accuracy of the partial QR decomposition &
                       &||A - Q*R||_F/||A||_F                   = ', err1
!
    if ( do_test ) then
!        
        write (prtunit,*) 'Accuracy of the range of the partial QR &
                          &approximation                            = ', err2
!
        if ( m>krank ) then
            write (prtunit,*) 'Orthogonality of the range of the QR approximation&
                               & and its orthogonal complement = ', err3
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a randomized partial and truncated QR decomposition with column pivoting of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_partial_rtqr_cmp
! ===================================
!
end program ex1_partial_rtqr_cmp

ex1_permute_cor.F90

program ex1_permute_cor
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_COR and PERMUTE_COR
!   in module Mul_Stat_Procedures for performing a permutation test on a correlation coefficient.
!
!
! LATEST REVISION : 12/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_cor, permute_cor, random_seed_, random_number_
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! p       IS THE NUMBER OF OBSERVATIONS OF THE RANDOM VECTORS;
! nrep    IS THE NUMBER OF SHUFFLES FOR THE PERMUTATION TEST;
! nsample IS THE NUMBER OF SAMPLES TO EVALUATE THE REJECTION RATE OF THE TEST.
!
    integer(i4b), parameter :: prtunit=6, p=50, nrep=99, nsample=8000
!
! sign_level IS THE SIGNIFICANCE LEVEL OF THE PERMUTATION TEST;
! eps        IS THE ALLOWED RELATIVE ERROR FOR THE REJECTION RATE.
!
    real(stnd),  parameter :: sign_level=0.05, eps=0.1
!
    character(len=*), parameter :: name_proc='Example 1 of permute_cor'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)               :: xyn, xycor, prob, err_prob
    real(stnd), dimension(2) :: xstat, ystat
    real(stnd), dimension(p) :: x, y
!
    integer(i4b) :: i, rej_rate
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   INITIALIZE THE RANDOM GENERATOR.
!
    call random_seed_()
!
!   INITIALIZE THE REJECTION RATE FOR THE PERMUTATION TEST.
!
    rej_rate = 0
!
    first    = true
    last     = true
!
!   GENERATE A RANDOM UNIFORM OBSERVATION VECTOR y .
!
    call random_number_( y(:p) )
!
    do i=1, nsample
!
!       GENERATE A RANDOM UNIFORM OBSERVATION VECTOR x .
!
        call random_number_( x(:p) )
!
!       COMPUTE THE CORRELATIONS BETWEEN x AND y
!       FOR THE p OBSERVATIONS.
!
        call comp_cor( x(:p), y(:p), first, last, xstat(:2), ystat(:2), xycor, xyn )
!
!       ON EXIT OF COMP_COR WHEN last=true :
!
!      xstat(1)     CONTAINS THE MEAN VALUE OF THE OBSERVATION VECTOR x(:p).
!
!      xstat(2)     CONTAINS THE VARIANCE OF THE OBSERVATION VECTOR x(:p).
!
!      ystat(1)     CONTAINS THE MEAN VALUE OF THE OBSERVATION VECTOR  y(:p).
!
!      ystat(2)     CONTAINS THE VARIANCE OF THE OBSERVATION VECTOR  y(:p).
!
!      xycor        CONTAINS THE CORRELATION COEFFICIENT BETWEEN x(:p) AND y(:p).
!
!      xyn          CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA VECTORS
!                   x(:p) AND  y(:p) (xyn=real(p,stnd) ).
!
!
!       NOW COMPUTE A PERMUTATION TEST OF THE CORRELATION BETWEEN x AND y WITH
!       SUBROUTINE permute_cor WITH nrep SHUFFLES.
!
        call permute_cor( x(:p), y(:p), xstat(:2), ystat(:2), xycor, prob, nrep=nrep )
!
!       EVALUATE THE REJECTION RATE.
!
        if ( prob<=sign_level ) rej_rate = rej_rate + 1
!
    end do
!
!   NOW COMPUTE THE REJECTION RATE OF THE TEST. IDEALLY FOR THE sign_level
!   SIGNIFICANCE LEVEL, WE WOULD REJECT ONLY THE NULL HYPOTHESIS sign_level %
!   OF THE TIME. IN OTHER WORDS, THE REJECTION RATE MUST BE APPROXIMATELY EQUAL
!   TO THE SIGNIFICANCE LEVEL sign_level .
!
    prob = real( rej_rate, stnd )/real( nsample, stnd )
!
!   COMPUTE THE RELATIVE ERROR OF THE REJECTION RATE.
!
    err_prob = abs( (prob-sign_level)/sign_level )
!
!   CHECK THAT THE RELATIVE ERROR OF THE REJECTION RATE IS LESS THAN eps .
!
    if ( err_prob<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_permute_cor
! ==============================
!
end program ex1_permute_cor

ex1_phase_scramble_cor.F90

program ex1_phase_scramble_cor
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_COR and PHASE_SCRAMBLE_COR
!   in module Mul_Stat_Procedures for performing a phase-scrambled bootstrap test on a correlation
!   coefficient.
!
!
! LATEST REVISION : 12/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_cor, phase_scramble_cor, pinvn
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! p       IS THE NUMBER OF OBSERVATIONS OF THE TIME SERIES VECTORS;
! nrep    IS THE NUMBER OF SHUFFLES FOR THE PHASE-SCRAMBLED BOOTSTRAP TEST;
! nsample IS THE NUMBER OF SAMPLES TO EVALUATE THE REJECTION RATE OF THE TEST.
!
    integer(i4b), parameter :: prtunit=6, p=50, nrep=99, nsample=5000
!
! sign_level IS THE SIGNIFICANCE LEVEL OF PHASE-SCRAMBLED BOOTSTRAP TEST;
! eps        IS THE ALLOWED RELATIVE ERROR FOR THE REJECTION RATE;
! b          IS THE LAG-1 AUTOCORRELATION FOR THE AR(1) MODEL USED
!            TO GENERATE THE TIME SERIES.
!
    real(stnd),  parameter :: sign_level=0.05, eps=0.2, b=0.3
!
    character(len=*), parameter :: name_proc='Example 1 of phase_scramble_cor'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)               :: xyn, xycor, prob, err_prob
    real(stnd), dimension(2) :: xstat, ystat
    real(stnd), dimension(p) :: x, y, e
!
    integer(i4b) :: i, j, rej_rate
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   INITIALIZE THE RANDOM GENERATOR.
!
    call random_seed()
!
!   INITIALIZE THE REJECTION RATE FOR THE PERMUTATION TEST.
!
    rej_rate = 0
    first = true
    last  = true
!
!   GENERATE A TIME SERIES USING AN AR(1) MODEL OF THE FORM
!
!               y(i+1) = b*y(i) + e(i)
!
!   WHERE b IS THE SPECIFIED LAG-1 AUTOCORRELATION AND e(I)
!   IS A NORMALLY DISTRIBUTED RANDOM VARIABLE WITH A 0 MEAN
!   AND A VARIANCE OF 1.
!
    call random_number( y(:p) )
    e(:p) = pinvn( y(:p) )
!
    y(1) = e(1)
    do j=2, p
        y(j) = b*y(j-1) + e(j)
    end do
!
    do i=1, nsample
!
!       GENERATE ANOTHER INDEPENDENT TIME SERIES FROM THE SAME AR(1) MODEL.
!
        call random_number( x(:p) )
        e(:p) = pinvn( x(:p) )
!
        x(1) = e(1)
        do j=2, p
            x(j) = b*x(j-1) + e(j)
        end do
!
!       COMPUTE THE CORRELATIONS BETWEEN x AND y
!       FOR THE p OBSERVATIONS.
!
        call comp_cor( x(:p), y(:p), first, last, xstat(:2), ystat(:2), xycor, xyn )
!
!       ON EXIT OF COMP_COR WHEN last=true :
!
!      xstat(1)     CONTAINS THE MEAN VALUE OF THE OBSERVATION VECTOR x(:p).
!
!      xstat(2)     CONTAINS THE VARIANCE OF THE OBSERVATION VECTOR x(:p).
!
!      ystat(1)     CONTAINS THE MEAN VALUE OF THE OBSERVATION VECTOR  y(:p).
!
!      ystat(2)     CONTAINS THE VARIANCE OF THE OBSERVATION VECTOR  y(:p).
!
!      xycor        CONTAINS THE CORRELATION COEFFICIENT BETWEEN x(:p) AND y(:p).
!
!      xyn          CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA VECTORS
!                   x(:p) AND  y(:p) (xyn=real(p,stnd) ).
!
!
!       NOW COMPUTE A PHASE-SCRAMBLED BOOTSTRAP TEST OF THE CORRELATION
!       BETWEEN x AND y WITH SUBROUTINE phase_scramble_cor WITH nrep SHUFFLES.
!
        call phase_scramble_cor( x(:p), y(:p), xstat(:2), ystat(:2), xycor, prob, nrep=nrep )
!
!       EVALUATE THE REJECTION RATE .
!
        if ( prob<=sign_level ) rej_rate = rej_rate + 1
!
    end do
!
!   NOW COMPUTE THE REJECTION RATE OF THE TEST. IDEALLY FOR THE sign_level
!   SIGNIFICANCE LEVEL, WE WOULD REJECT ONLY THE NULL HYPOTHESIS sign_level %
!   OF THE TIME. IN OTHER WORDS, THE REJECTION RATE MUST BE APPROXIMATELY EQUAL
!   TO THE SIGNIFICANCE LEVEL sign_level .
!
    prob = real( rej_rate, stnd )/real( nsample, stnd )
!
!   COMPUTE THE RELATIVE ERROR OF THE REJECTION RATE.
!
    err_prob = abs( (prob-sign_level)/sign_level )
!
!   CHECK THAT THE RELATIVE ERROR OF THE REJECTION RATE IS LESS THAN eps .
!
    if ( err_prob<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_phase_scramble_cor
! =====================================
!
end program ex1_phase_scramble_cor

ex1_pk_coef.F90

program ex1_pk_coef
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines/functions PK_COEF,
!   FREQ_FUNC and SYMLIN_FILTER in module Time_Series_Procedures for band-pass filtering
!   a time series with a Lanczos filter.
!
!
! LATEST REVISION : 04/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, zero, half, one, two, pi, true, false, merror, allocate_error,   &
                         pk_coef, freq_func, symlin_filter, init_fft, fft, end_fft
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=2000
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err, tmp, fc
    real(stnd), dimension(n)              :: y, y2, y3, freqr
    real(stnd), dimension(:), allocatable :: coef
!
    complex(stnd), dimension(n) :: yc
!
    integer(i4b) :: i, k, k1, k2, pc, nfilt, khalf, kmid
!   
    integer :: iok
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of pk_coef'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
!
!   pc IS THE PERIOD OF OSCILLATION WITH A PEAK RESPONSE NEAR ONE.
!   pc IS EXPRESSED IN NUMBER OF OBSERVATIONS, I.E. pc==32(96) SELECT
!   A PERIOD OF 8 YRS FOR QUARTERLY (MONTHLY) DATA.
!
    pc  = 32
!
!   COMPUTE THE CORRESPONDING CUTOFF FREQUENCY.
!
    fc = one/real( pc, stnd )
!
!   NOW SELECT THE OPTIMAL NUMBER OF FILTER COEFFICIENTS k FOR THE LANCZOS FILTER.
!
    tmp = 2.3*max( real( pc, stnd ), one/(half-fc) )
    k = ceiling( tmp, i4b )
!
!   CHECK IF k IS ODD.
!
    if ( (k/2)*2==k  ) k = k + 1
!
!   ALLOCATE WORK ARRAY FOR THE FILTER COEFFICIENTS.
!
    allocate( coef(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   FUNCTION pk_coef COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- BAND PASS FILTER WITH A PEAK RESPONSE NEAR ONE AT THE PERIOD pc (EG FREQUENCY FREQ=1/pc).
!
    coef(:k) = pk_coef( freq=fc, k=k )
!
!   SUBROUTINE symlin_filter PERFORMS A SYMMETRIC FILTERING OPERATION ON AN IMPUT
!   TIME SERIES (THE ARGUMENT VEC).
!
!   HERE symlin_filter FILTERS THE TIME SERIES .
!   NOTE THAT (size(COEF)-1)/2 DATA POINTS WILL BE LOST FROM EACH END OF THE SERIES SO THAT
!   NFILT (NFILT= size(VEC) - size(COEF) + 1) TIME OBSERVATIONS ARE RETURNED IN VEC(:NFILT)
!   AND THE REMAINING OBSERVATIONS ARE SET TO ZERO.
!
    call symlin_filter( vec=y2(:n), coef=coef(:k),  nfilt=nfilt )
!
!   NOW COMPUTE THE TRANSFERT FUNCTION freqr(:) OF THE LANCZOS FILTER AT THE FOURIER FREQUENCIES.
!
    call freq_func( nfreq=n, coef=coef(:k), freqr=freqr(:n), four_freq=true )
!
!   APPLY THE FILTER USING THE FFT AND THE CONVOLUTION THEOREM.
!
!   INITIALIZE THE FFT SUBROUTINE.
!
    call init_fft( n )
!
!   TRANSFORM THE TIME SERIES.
!
    yc(:n) = cmplx( y(1:n), zero, kind=stnd )
!
    call fft( yc(:n), forward=true  )
!
!   MULTIPLY THE FOURIER TRANSFORM OF THE TIME SERIES
!   BY THE TRANSFERT FUNCTION OF THE FILTER.
!
    yc(:n) = yc(:n)*freqr(:n)
!
!   INVERT THE SEQUENCE BACK TO GET THE FILTERED TIME SERIES.
!
    call fft( yc(:n), forward=false )
!
    y3(:n) = real( yc(:n),  kind=stnd )
!
!   DEALLOCATE WORK ARRAYS.
!
    call end_fft()
!
    deallocate( coef )
!
!   TEST THE ACCURACY OF THE FILTERING OPERATION.
!
    kmid  = ( k + 1 )/2
    khalf = ( k - 1 )/2
    k1    = kmid
    k2    = n - khalf
!
    err = maxval(abs(y3(k1:k2)-y2(:nfilt)))/maxval(abs(y(k1:k2)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_pk_coef
! ==========================
!
end program ex1_pk_coef

ex1_power_spectrum.F90

program ex1_power_spectrum
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine POWER_SPECTRUM
!   in module Time_Series_Procedures for computing the power spectrum of a time series.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine MVS in module Stat_Procedures
!    for computing univariate statistics of a time series.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, one, power_spectrum, comp_mvs, print_array
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES AND MUST BE EVEN.
!
    integer(i4b), parameter :: prtunit=6, n=100, psn=(n/2)+1
!
    character(len=*), parameter :: name_proc='Example 1 of power_spectrum'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                   :: err_mean, err_var, xmean, xmean2, xvar, xvar2, xstd, eps, tmp
    real(stnd), dimension(n)     :: x
    real(stnd), dimension(psn,2) :: psx
!
    integer(i4b) :: trend, win, i
!
    logical(lgl) :: smooth, normpsd
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(err_mean) )
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( x(:n) )
!
!   COMPUTE THE POWER SPECTRUM OF THE TIME SERIES x(:n).
!
!   BY DEFAULT, BARTLETT WINDOW IS USED IN THE COMPUTATION OF THE POWER SPECTRUM (i.e. WIN=1).
!   SET WIN=2 FOR RECTANGULAR WINDOW, WIN=3 FOR WELCH WINDOW, WIN=4 FOR HANNING WINDOW
!   OR WIN=5 FOR HAMMING WINDOW.
!   IN ANY CASE, WIN MUST BE GREATER OR EQUAL TO 1 AND LESS OR EQUAL TO 5.
!
    win = 2
!
!   BY DEFAULT, THE MEAN OF THE TIME SERIES IS REMOVED BEFORE THE COMPUTATION
!   OF THE POWER SPECTRUM (i.e. TREND=1).
!   SET TREND=2 FOR REMOVING THE DRIFT OR TREND=3 FOR REMOVING THE LEAST SQUARES LINE
!   FROM THE TIME SERIES BEFORE ESTIMATING THE POWER SPECTRUM.
!   FOR OTHER VALUES OF TREND NOTHING IS DONE BEFORE ESTIMATING THE SPECTRUM.
!
    trend = 0
!
!   ON ENTRY, IF NORMPSD IS SET TO TRUE, THE POWER SPECTRAL DENSITY (PSD) ESTIMATES
!   ARE NORMALIZED IN SUCH A WAY THAT THE TOTAL AREA UNDER THE POWER SPECTRUM IS EQUAL
!   TO THE VARIANCE OF THE TIME SERIES VEC. IF NORMPSD IS SET TO FALSE, THE SUM OF THE
!   PSD ESTIMATES (E.G. sum( PSVEC(2:) ) IS EQUAL TO THE VARIANCE OF THE TIME SERIES.
!   THE DEFAULT IS NORMPSD=true .
!
    normpsd = false
!
!   ON EXIT, PSVEC CONTAINS THE POWER SPECTRAL DENSITY (PSD) ESTIMATES OF VEC
!   AT THE psn FOURIER FREQUENCIES.
!
    call power_spectrum( vec=x(:n), psvec=psx(:psn,2), normpsd=normpsd,   &
                         win=win, trend=trend )
!
!   BUILD UP PERIOD AXIS.
!
    psx(1,1) = -one
    tmp = real( n, stnd )
!
    do i = 1, psn-1
        psx(i+1,1) = tmp/real( i, stnd )
    end do
!
!   PRINT POWER SPECTRUM OF x(:n).
!
    call print_array( psx, title='POWER SPECTRUM', namcol=(/ "PERIOD", "PSD   "/) )
!
!   ESTIMATE THE MEAN AND VARIANCE OF THE SIGNAL THROUGH THE POWER SPECTRUM.
!
    xmean2 = sqrt( psx(1,2) )
    xvar2  = sum( psx(2:psn,2) )
!
!   COMPUTE THE MEAN AND THE VARIANCE WITH SUBROUTINE comp_mvs .
!
    call comp_mvs( x=x(:n), first=true, last=true, xmean=xmean, xvar=xvar, xstd=xstd )
!
!   COMPARE THE TWO SETS OF STATISTICS.
!
    err_mean = abs( (xmean-xmean2)/xmean )
    err_var  = abs( (xvar-xvar2)/xvar )
!
!   TEST THE ACCURACY OF THE STATISTICS.
!
    if ( max(err_mean,err_var)<=eps  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_power_spectrum
! =================================
!
end program ex1_power_spectrum

ex1_print_array.F90

program ex1_print_array
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine PRINT_ARRAY
!   in module Print_Procedures for printing real vectors and matrices.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, print_array
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE MATRIX.
! n IS ALSO THE DIMENSION OF THE VECTOR.
!
    integer(i4b), parameter :: prtunit=6, m=20, n=5
!
    character(len=*), parameter :: name_proc='Example 1 of print_array'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)  :: real_matrix(m,n), real_vector(m)
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM REAL MATRIX .
!
    call random_number( real_matrix )
!
!   GENERATE A RANDOM REAL VECTOR .
!
    call random_number( real_vector )
!
!   PRINT THE RANDOM REAL MATRIX .
!
    call print_array( real_matrix, title='real_matrix' )
!
!   PRINT THE RANDOM REAL VECTOR .
!
    call print_array( real_vector, title='real_vector' )
!
!
! END OF PROGRAM ex1_print_array
! ==============================
!
end program ex1_print_array

ex1_probbeta.F90

program ex1_probbeta
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBBETA, PINVSTUDENT
!   in module Prob_Procedures.
!
!
! LATEST REVISION : 22/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, half, probbeta, pinvstudent
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-4_stnd 
!
    character(len=*), parameter :: name_proc='Example 1 of probbeta'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, t, x
    real(stnd)                 :: err, df, a
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF STUDENT T-DISTRIBUTION.
!
    df = 50_stnd
!
!   GENERATE A RANDOM PROBABILITY MATRIX p .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE TWO-TAIL QUANTILES T OF STUDENT T-DISTRIBUTION
!   WITH df DEGREES OF FREEDOM.
!
    t(:n,:m) = pinvstudent( p(:n,:m), df )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probbeta FUNCTION.
!
    x(:n,:m) =  df/(df+ t(:n,:m)*t(:n,:m) )
    a = half*df
!
    p2(:n,:m) = probbeta( x(:n,:m), a, half )
!
!   CHECK THAT p AND p2 AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_probbeta
! ===========================
!
end program ex1_probbeta

ex1_probn.F90

program ex1_probn
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBN and PINVN
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 15/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, false, probn, pinvn
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-4_stnd 
!
    character(len=*), parameter :: name_proc='Example 1 of probn'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, x
    real(stnd)                 :: err
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM PROBABILITY MATRIX p .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE NORMAL DEVIATES CORRESPONDING TO LOWER TAIL AREAS OF P .
!
    x(:n,:m) = pinvn( p(:n,:m) ) 
!
!   RECOMPUTE THE PROBABILITIES FROM THE NORMAL DEVIATES .
!
    p2(:n,:m) = probn( x(:n,:m), upper=false )
!
!   CHECK THAT p AND p2 AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_probn
! ========================
!
end program ex1_probn

ex1_probn2.F90

program ex1_probn2
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBN2 and PINVN2
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 15/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, extd, false, probn2, pinvn2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(extd), parameter :: eps = 1.0e-6_extd
!
    character(len=*), parameter :: name_proc='Example 1 of probn2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(extd), dimension(n,m) :: p, p2, x
    real(extd)                 :: err
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM PROBABILITY MATRIX p .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE NORMAL DEVIATES CORRESPONDING TO LOWER TAIL AREAS OF P .
!
    x(:n,:m) = pinvn2( p(:n,:m) ) 
!
!   RECOMPUTE THE PROBABILITIES FROM THE NORMAL DEVIATES .
!
    p2(:n,:m) = probn2( x(:n,:m), upper=false )
!
!   CHECK THAT p AND p2 AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_probn2
! =========================
!
end program ex1_probn2

ex1_probq.F90

program ex1_probq
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBQ, PINVQ
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 26/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, false, probq, pinvq
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-3_stnd 
!
    character(len=*), parameter :: name_proc='Example 1 of probq'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, x2
    real(stnd)                 :: err
!
    integer(i4b) :: ndf
!
    logical(lgl) :: upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF CHI-SQUARED DISTRIBUTION .
!
    ndf = 50_i4b
!
!   GENERATE A RANDOM PROBABILITY MATRIX p(:m,:n) .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE QUANTILES x2(:m,:n) OF CHI-SQUARED DISTRIBUTION WITH ndf DEGREES OF FREEDOM
!   CORRESPONDING TO LOWER TAIL AREAS OF p(:m,:n) .
!
    x2(:n,:m) = pinvq( p(:n,:m), ndf )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probq FUNCTION.
!
    upper = false
!
    p2(:n,:m) = probq( x2(:n,:m), ndf, upper=upper )
!
!   CHECK THAT p(:n,:m) AND p2(:n,:m) AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_probq
! ========================
!
end program ex1_probq

ex1_probq2.F90

program ex1_probq2
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBQ2, PINVQ2
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 26/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, false, probq2, pinvq2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-3_stnd 
!
    character(len=*), parameter :: name_proc='Example 1 of probq2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, x2
    real(stnd)                 :: err, df
!
    integer(i4b) :: i, j
!
    logical(lgl) :: upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF CHI-SQUARED DISTRIBUTION.
!   DF IS NOT NECESSARILY AN INTEGER.
!
    df = 50.5_stnd
!
!   GENERATE A RANDOM PROBABILITY MATRIX p(:m,:n) .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE QUANTILES x2(:m,:n) OF CHI-SQUARED DISTRIBUTION WITH df DEGREES OF FREEDOM
!   CORRESPONDING TO LOWER TAIL AREAS OF p(:m,:n) .
!
    x2(:n,:m) = pinvq2( p(:n,:m), df )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probq2 FUNCTION.
!
    upper = false
!
    p2(:n,:m) = probq2( x2(:n,:m), df, upper=upper )
!
!   CHECK THAT p(:n,:m) AND p2(:n,:m) AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_probq2
! =========================
!
end program ex1_probq2

ex1_probq3.F90

program ex1_probq3
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBQ3, PINVQ2
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 26/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, false, probq3, pinvq2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-3_stnd 
!
    character(len=*), parameter :: name_proc='Example 1 of probq3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, x2
    real(stnd)                 :: err, df
!
    integer(i4b) :: i, j
!
    logical(lgl) :: upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF CHI-SQUARED DISTRIBUTION.
!   DF IS NOT NECESSARILY AN INTEGER.
!
    df = 50.5_stnd
!
!   GENERATE A RANDOM PROBABILITY MATRIX p(:m,:n) .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE QUANTILES x2(:m,:n) OF CHI-SQUARED DISTRIBUTION WITH df DEGREES OF FREEDOM
!   CORRESPONDING TO LOWER TAIL AREAS OF p(:m,:n) .
!
    x2(:n,:m) = pinvq2( p(:n,:m), df )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probq3 FUNCTION.
!
    upper = false
!
    p2(:n,:m) = probq3( x2(:n,:m), df, upper=upper )
!
!   CHECK THAT p(:n,:m) AND p2(:n,:m) AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_probq3
! =========================
!
end program ex1_probq3

ex1_probstudent.F90

program ex1_probstudent
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBSTUDENT, PINVSTUDENT
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 26/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, false, probstudent, pinvstudent
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-4_stnd 
!
    character(len=*), parameter :: name_proc='Example 1 of probstudent'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, t
    real(stnd)                 :: err, df
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF STUDENT T-DISTRIBUTION .
!
    df = 50._stnd
!
!   GENERATE A RANDOM PROBABILITY MATRIX p(:n,:m) .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE TWO-TAIL QUANTILES t OF STUDENT T-DISTRIBUTION 
!   WITH df DEGREES OF FREEDOM CORRESPONDING TO AREAS OF p(:n,:m) .
!
    t(:n,:m) = pinvstudent( p(:n,:m), df )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probstudent FUNCTION.
!
    p2(:n,:m) = probstudent( t(:n,:m), df )
!
!   CHECK THAT p AND p2 AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_probstudent
! ==============================
!
end program ex1_probstudent

ex1_probt.F90

program ex1_probt
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBT, PINVT
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 26/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, false, probt, pinvt
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-4_stnd 
!
    character(len=*), parameter :: name_proc='Example 1 of probt'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, t
    real(stnd)                 :: err
!
    integer(i4b) :: ndf
!
    logical(lgl) :: upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF STUDENT T-DISTRIBUTION .
!
    ndf = 50_i4b
!
!   GENERATE A RANDOM PROBABILITY MATRIX p(:,:) .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE QUANTILES T OF STUDENT T-DISTRIBUTION WITH ndf DEGREES OF FREEDOM
!   CORRESPONDING TO LOWER TAIL AREAS OF p(:,:) .
!
    t(:n,:m) = pinvt( p(:n,:m), ndf )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probt FUNCTION.
!
    upper = false
!
    p2(:n,:m) = probt( t(:n,:m), ndf, upper=upper )
!
!   CHECK THAT p AND p2 AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_probt
! ========================
!
end program ex1_probt

ex1_qlp_cmp.F90

program ex1_qlp_cmp
!
!
! Purpose
! =======
!
!   This program illustrates how to compute a partial QLP decomposition
!   using subroutine QLP_CMP in module SVD_Procedures.
!                                                                              
!                                                                              
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror,  &
                         allocate_error, norm, unit_matrix, random_seed_, random_number_,    &
                         gen_random_mat, qlp_cmp, singval_sort
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn),
! nqlp IS THE TARGET RANK OF THE PARTIAL QLP FACORIZATION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000, mn=min(m,n), nsvd0=300, nqlp=20
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of qlp_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, eps, elapsed_time, anorm, lnorm, tmp, tmp2, &
                  relerr, relerr2, abs_err, rel_err
    real(stnd), dimension(:,:), allocatable :: a, qmat, lmat, pmat, res, id
    real(stnd), dimension(:),   allocatable :: singval0, lval, beta, tau
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: blk_size, nover, i, mat_type
!
    logical(lgl) :: random_qr, truncated_qr, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL QLP DECOMPOSITION OF A m-BY-n REAL MATRIX USING A RANDOMIZED QR
!               DETERMINISTIC ALGORITHM IN THE FIRST PHASE OF THE ALGORITHM.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL QLP FACTORIZATION COMPARED TO THE BEST SVD
!   APPROXIMATION.
!
    eps = 0.05_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE QLP DECOMPOSITION.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE QLP ALGORITHM.
!
!   DETERMINE IF A RANDOMIZED PARTIAL QR ALGORITHM IS USED IN THE FIRST PHASE OF THE QLP DECOMPOSITION.
!
    random_qr = true
!
!   DETERMINE IF A RANDOMIZED PARTIAL AND TRUNCATED QR ALGORITHM IS USED IN THE FIRST PHASE OF
!   THE QLP ALGORITHM.
!
    truncated_qr = false
!
!   DETERMINE THE BLOCK SIZE USED IN THE RANDOMIZED PARTIAL QR PHASE OF THE ALGORITHM IF
!   random_qr IS SET TO true.
!
    blk_size = 60_i4b
!    blk_size = 30_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover FOR THE RANDOMIZED PARTIAL QR PHASE OF THE ALGORITHM IF
!   random_qr IS SET TO true.
!
    nover = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), qmat(m,nqlp), pmat(nqlp,n), lmat(nqlp,nqlp), &
              singval0(nsvd0), beta(nqlp), tau(nqlp), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( singval0 ) ) then
!
        if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
    call singval_sort( 'D', singval0(:nsvd0) )
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE GENERATED MATRIX.
!
    anorm = norm( singval0(:nsvd0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   qlp_cmp COMPUTES A PARTIAL QLP DECOMPOSITION OF A REAL m-BY-n MATRIX a. 
!   THE PARTIAL QLP IS WRITTEN
!
!                       a  ≈ Q * L * P
!
!   WHERE L IS AN nqlp-BY-nqlp LOWER TRIANGULAR MATRIX WHOSE DIAGONAL ELEMENTS (IN ABSOLUTE VALUE) ARE
!   GOOD APPROXIMATIONS OF THE nqlp LARGEST SINGULAR VALUES OF a SORTED IN DECREASING ORDER (E.G,. THE
!   SO-CALLED L-VALUES), Q IS AN m-BY-nqlp ORTHONORMAL MATRIX, AND L IS AN nqlp-BY-n ORTHONORMAL MATRIX
!   STORED ROWWISE.
!
    call qlp_cmp( a(:m,:n), beta(:nqlp), tau(:nqlp), lmat=lmat(:nqlp,:nqlp),     &
                  qmat=qmat(:m,:nqlp), pmat=pmat(:nqlp,:n), random_qr=random_qr, &
                  truncated_qr=truncated_qr, blk_size=blk_size, nover=nover )
!
!   THE ROUTINE RETURNS THE QLP FACTORIZATION IN FACTORED FORM IN ARRAYS a, beta AND tau AND EXPLICITLY
!   IF THE OPTIONAL ARRAY ARGUMENTS lmat, qmat AND pmat ARE SPECIFIED IN INPUT OF qlp_cmp SUBROUTINE.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR IN FROBENIUS NORM
!   FOR THE PARTIAL QLP DECOMPOSITION OF RANK nqlp.
!
    lnorm = norm( lmat(:nqlp,:nqlp) )
!
    tmp = one -  (lnorm/anorm)**2
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE BEST RELATIVE ERROR IN FROBENIUS NORM
!   FOR A PARTIAL SVD DECOMPOSITION OF RANK nqlp.
!
    if ( nsvd0>nqlp ) then
        relerr2 = norm( singval0(nqlp+1_i4b:nsvd0)/anorm )
    else
        relerr2 = zero
    end if
!
!   COMPUTE ERROR BETWEEN THE BEST AND QLP RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE QLP FACTORS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( lval(nqlp), res(m,nqlp), id(nqlp,nqlp), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       GET THE L-VALUES.
!
        do i = 1_i4b, nqlp
            lval(i) = abs( lmat(i,i) )
        end do
!
!       COMPUTE ERRORS FOR THE L-VALUES AS ESTIMATES OF THE SINGULAR VALUES.
!
        i = min( nqlp, nsvd0 )
!
        id(:i,1_i4b) = singval0(:i)
        id(i+1_i4b:nqlp,1_i4b) = zero
!
        where( id(:nqlp,1_i4b)/=zero )
            res(:nqlp,1_i4b) = id(:nqlp,1_i4b)
        elsewhere
            res(:nqlp,1_i4b) = one
        end where
!
!       ABSOLUTE ERRORS OF L-VALUES AS ESTIMATES OF SINGULAR VALUES.
!
        abs_err = maxval( abs( lval(:nqlp) - id(:nqlp,1_i4b) ) )
!
!       RELATIVE ERRORS OF L-VALUES AS ESTIMATES OF SINGULAR VALUES.
!
        rel_err = maxval( abs( (lval(:nqlp) - id(:nqlp,1_i4b))/res(:nqlp,1_i4b) ) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - q(:m,:nqlp)**(t)*q(:m,:nqlp).
!
        call unit_matrix( id(:nqlp,:nqlp) )
!
        res(:nqlp,:nqlp) = abs( id(:nqlp,:nqlp) - matmul( transpose(qmat), qmat ) )
!
        err1 = maxval( res(:nqlp,:nqlp) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - p(:nqlp,:n)*p(:nqlp,:n)**(t).
!
        res(:nqlp,:nqlp) = abs( id(:nqlp,:nqlp) - matmul( pmat, transpose(pmat) ) )
!
        err2 = maxval( res(:nqlp,:nqlp) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( lval, res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, singval0, beta, tau, lmat, qmat, pmat )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the partial QLP approximation                                = ', &
                      nqlp
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-QLP||_F / ||A||_F         = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                      relerr2
!
    if ( do_test ) then
        write (prtunit,*)
        write (prtunit,*) 'Absolute accuracy of the computed L-values    = ', abs_err
        write (prtunit,*) 'Relative accuracy of the computed L-values    = ', rel_err
        write (prtunit,*) 'Orthogonality of the computed Q matrix        = ', err1
        write (prtunit,*) 'Orthogonality of the computed P matrix        = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing a partial QLP approximation of rank ', nqlp, ' of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_qlp_cmp
! ==========================
!
end program ex1_qlp_cmp

ex1_qlp_cmp2.F90

program ex1_qlp_cmp2
!
!
! Purpose
! =======
!
!   This program illustrates how to compute a partial QLP decomposition
!   using subroutine QLP_CMP2 in module SVD_Procedures.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror,  &
                         allocate_error, norm, unit_matrix, random_seed_, random_number_,    &
                         gen_random_mat, qlp_cmp2, singval_sort
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn),
! nqlp IS THE TARGET RANK OF THE PARTIAL QLP FACORIZATION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000, mn=min(m,n), nsvd0=300, nqlp=20
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of qlp_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, eps, elapsed_time, anorm, lnorm, tmp, tmp2, &
                  relerr, relerr2, abs_err, rel_err
    real(stnd), dimension(:,:), allocatable :: a, qmat, lmat, pmat, res, id
    real(stnd), dimension(:),   allocatable :: singval0, lval
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: niter_qrql, blk_size, nover, i, mat_type
!
    logical(lgl) :: random_qr, truncated_qr, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL QLP DECOMPOSITION OF A m-BY-n REAL MATRIX USING A RANDOMIZED QR
!               DETERMINISTIC ALGORITHM IN THE FIRST STAGE OF THE ALGORITHM AND QR-QL ITERATIONS
!               IN A FINAL STAGE FOR IMPROVING THE ACCURACY OF THE L-VALUES.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL QLP FACTORIZATION COMPARED TO THE BEST SVD
!   APPROXIMATION.
!
    eps = 0.05_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE QLP DECOMPOSITION.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE QLP ALGORITHM.
!
!   CHOOSE THE NUMBER OF QR-QL ITERATIONS TO BE PERFORMED FOR IMPROVING THE QUALITY OF THE L-VALUES.
!
    niter_qrql = 4_i4b
!
!   DETERMINE IF A RANDOMIZED PARTIAL QR ALGORITHM IS USED IN THE FIRST PHASE OF THE QLP DECOMPOSITION.
!
    random_qr = true
!
!   DETERMINE IF A RANDOMIZED PARTIAL AND TRUNCATED QR ALGORITHM IS USED IN THE FIRST PHASE OF
!   THE QLP ALGORITHM.
!
    truncated_qr = false
!
!   DETERMINE THE BLOCK SIZE USED IN THE RANDOMIZED PARTIAL QR PHASE OF THE ALGORITHM IF
!   random_qr IS SET TO true.
!
    blk_size = 60_i4b
!    blk_size = 30_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover FOR THE RANDOMIZED PARTIAL QR PHASE OF THE ALGORITHM IF
!   random_qr IS SET TO true.
!
    nover = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), qmat(m,nqlp), pmat(nqlp,n), lmat(nqlp,nqlp), &
              singval0(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( singval0 ) ) then
!
        if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
    call singval_sort( 'D', singval0(:nsvd0) )
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE GENERATED MATRIX.
!
    anorm = norm( singval0(:nsvd0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   qlp_cmp2 COMPUTES A PARTIAL QLP DECOMPOSITION OF A REAL m-BY-n MATRIX a. 
!   THE PARTIAL QLP IS WRITTEN
!
!                       a  ≈ Q * L * P
!
!   WHERE L IS AN nqlp-BY-nqlp LOWER TRIANGULAR MATRIX WHOSE DIAGONAL ELEMENTS (IN ABSOLUTE VALUE) ARE
!   GOOD APPROXIMATIONS OF THE nqlp LARGEST SINGULAR VALUES OF a SORTED IN DECREASING ORDER (E.G,. THE
!   SO-CALLED L-VALUES), Q IS AN m-BY-nqlp ORTHONORMAL MATRIX, AND L IS AN nqlp-BY-n ORTHONORMAL MATRIX
!   STORED ROWWISE. THE QUALITY OF L-VALUES CAN BE IMPROVED BY ADDITIONAL QR-QL ITERATIONS IF REQUIRED.
!
    call qlp_cmp2( a(:m,:n), lmat(:nqlp,:nqlp), qmat(:m,:nqlp), pmat(:nqlp,:n),           &
                   niter_qrql=niter_qrql, random_qr=random_qr, truncated_qr=truncated_qr, &
                   blk_size=blk_size, nover=nover )
!
!   THE ROUTINE RETURNS THE QLP FACTORIZATION EXPLICITLY IN THE ARRAY ARGUMENTS lmat, qmat AND pmat SPECIFIED
!   IN INPUT OF qlp_cmp2 SUBROUTINE.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR IN FROBENIUS NORM
!   FOR THE PARTIAL QLP DECOMPOSITION OF RANK nqlp.
!
    lnorm = norm( lmat(:nqlp,:nqlp) )
!
    tmp = one -  (lnorm/anorm)**2
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE BEST RELATIVE ERROR IN FROBENIUS NORM
!   FOR A PARTIAL SVD DECOMPOSITION OF RANK nqlp.
!
    if ( nsvd0>nqlp ) then
        relerr2 = norm( singval0(nqlp+1_i4b:nsvd0)/anorm )
    else
        relerr2 = zero
    end if
!
!   COMPUTE ERROR BETWEEN THE BEST AND QLP RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE QLP FACTORS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( lval(nqlp), res(m,nqlp), id(nqlp,nqlp), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       GET THE L-VALUES.
!
        do i = 1_i4b, nqlp
            lval(i) = abs( lmat(i,i) )
        end do
!
!       COMPUTE ERRORS FOR THE L-VALUES AS ESTIMATES OF THE SINGULAR VALUES.
!
        i = min( nqlp, nsvd0 )
!
        id(:i,1_i4b) = singval0(:i)
        id(i+1_i4b:nqlp,1_i4b) = zero
!
        where( id(:nqlp,1_i4b)/=zero )
            res(:nqlp,1_i4b) = id(:nqlp,1_i4b)
        elsewhere
            res(:nqlp,1_i4b) = one
        end where
!
!       ABSOLUTE ERRORS OF L-VALUES AS ESTIMATES OF SINGULAR VALUES.
!
        abs_err = maxval( abs( lval(:nqlp) - id(:nqlp,1_i4b) ) )
!
!       RELATIVE ERRORS OF L-VALUES AS ESTIMATES OF SINGULAR VALUES.
!
        rel_err = maxval( abs( (lval(:nqlp) - id(:nqlp,1_i4b))/res(:nqlp,1_i4b) ) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - q(:m,:nqlp)**(t)*q(:m,:nqlp).
!
        call unit_matrix( id(:nqlp,:nqlp) )
!
        res(:nqlp,:nqlp) = abs( id(:nqlp,:nqlp) - matmul( transpose(qmat), qmat ) )
!
        err1 = maxval( res(:nqlp,:nqlp) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - p(:nqlp,:n)*p(:nqlp,:n)**(t).
!
        res(:nqlp,:nqlp) = abs( id(:nqlp,:nqlp) - matmul( pmat, transpose(pmat) ) )
!
        err2 = maxval( res(:nqlp,:nqlp) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( lval, res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, singval0, lmat, qmat, pmat )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the partial QLP approximation                                = ', &
                      nqlp
    write (prtunit,*) 'Number of QR-QL iterations performed                                 = ', &
                      niter_qrql
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-QLP||_F / ||A||_F         = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                      relerr2
!
    if ( do_test ) then
        write (prtunit,*)
        write (prtunit,*) 'Absolute accuracy of the computed L-values    = ', abs_err
        write (prtunit,*) 'Relative accuracy of the computed L-values    = ', rel_err
        write (prtunit,*) 'Orthogonality of the computed Q matrix        = ', err1
        write (prtunit,*) 'Orthogonality of the computed P matrix        = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing a partial QLP approximation of rank ', nqlp, ' of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_qlp_cmp2
! ===========================
!
end program ex1_qlp_cmp2

ex1_qr_cmp.F90

program ex1_qr_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines QR_CMP and 
!   ORTHO_GEN_QR in module QR_Procedures for computing a QR decomposition of
!   a real matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, c50, qr_cmp, ortho_gen_qr,   &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of qr_cmp'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, ulp, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, resid2, norma
    real(stnd), allocatable, dimension(:,:) :: a, q, r, resid
!
    integer(i4b) :: k, j, l
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE A FULL QR DECOMPOSITION OF A DATA MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
    k = min( m, n )
    l = max( m, n )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), diagr(k), beta(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( r(k,n), q(m,l), resid(m,l), resid2(n), norma(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX FOR LATER USE.
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE QR DECOMPOSITION OF RANDOM DATA MATRIX WITH SUBROUTINE qr_cmp.
!
    call qr_cmp( a(:m,:n), diagr(:k), beta(:k) )
!
!   qr_cmp COMPUTES AN ORTHOGONAL FACTORIZATION OF A REAL m-BY-n MATRIX
!   BY THE HOUSEOLDER METHOD. THE MATRIX IS ASSUMED OF FULL RANK. THE ROUTINE
!   COMPUTES A QR FACTORIZATION OF a AS:
!
!                     a = Q * R
!
!   Q IS A m-BY-m ORTHOGONAL MATRIX AND R IS A m-BY-n UPPER TRIANGULAR OR
!   TRAPEZOIDAL MATRIX.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS ORTHOGONAL
!   FACTORIZATION. 
!
!   THE MATRIX Q IS REPRESENTED AS A PRODUCT OF ELEMENTARY REFLECTORS
!
!            Q = h(1)*h(2)* ... *h(k), WHERE k = min( size(a,1) , size(a,2) )
!
!   EACH h(i) HAS THE FORM
!
!            h(i) = I + BETA * ( V * V' ) ,
!                      
!   WHERE BETA IS A REAL SCALAR AND V IS A REAL m-ELEMENT VECTOR WITH V(1:i-1) = 0.
!   V(i:m) IS STORED ON EXIT IN a(i:m,i) AND BETA IN beta(i).
!                      
!   THE ELEMENTS ABOVE THE DIAGONAL IN THE ARRAY a CONTAIN THE
!   CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX R. THE ELEMENTS
!   OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr ON EXIT.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       NOW, RESTORE TRIANGULAR FACTOR R OF QR DECOMPOSITION OF DATA MATRIX a
!       IN MATRIX r(:k,:n) .
!
        do j = 1_i4b, k
!
            r(1_i4b:j-1_i4b,j) = a(1_i4b:j-1_i4b,j)
            r(j,j)             = diagr(j)
            r(j+1_i4b:k,j)     = zero
!
        end do
!
        do j = k+1_i4b, n
!
            r(1_i4b:k,j) = a(1_i4b:k,j)
!
        end do
!
        q(:m,:k) = a(:m,:k)
!
!       GENERATE ORTHOGONAL MATRIX Q OF THE QR DECOMPOSITION OF DATA MATRIX a
!       AND AN ORTHOGONAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!       a IS ASSUMED OF FULL RANK.
!
        call ortho_gen_qr( q(:m,:m), beta(:k) )
!
!       ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS THE PRODUCT OF k ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(k)
!
!       AS RETURNED BY qr_cmp OR qr_cmp2.
!
!       THE SIZE OF beta DETERMINES THE NUMBER k OF ELEMENTARY REFLECTORS
!       WHOSE PRODUCT DEFINES THE MATRIX Q.
!
!       NOW q(:m,:krank) IS AN ORTHONORMAL BASIS FOR THE RANGE OF a AND q(:m,krank+1:m) IS
!       AN ORTHONORMAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!
!       RESTORE THE INPUT MATRIX IN a(:m,:n) .
!
        a(:m,:n) = resid(:m,:n)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a(:m,:n) - q(:m,:k)*r(:k,:n).
!
        resid(:m,:n) = a(:m,:n) - matmul( q(:m,:k), r(:k,:n) )
        resid2(:n)   = norm( resid(:m,:n), dim=2_i4b )
        norma(:n)    = norm( a(:m,:n), dim=2_i4b )
        err1         = maxval( resid2(:n) / norma(:n) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q'*Q.
!
        resid(:m,:m) = abs( resid(:m,:m) - matmul( transpose(q(:m,:m)), q(:m,:m) ) )
!
        do j = 1, m
            resid(j,j) =  resid(j,j) - one
        end do
!
        err2 = maxval( resid(:m,:m) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF a(:m,:n) AND ITS ORTHOGONAL COMPLEMENT q(:m,n+1:m).
!
        if ( m>n ) then
!
            resid(:n,n+1_i4b:m) = matmul( transpose(a(:m,:n)), q(:m,n+1_i4b:m) )
            err3 = maxval( abs( resid(:n,n+1_i4b:m) ) )/real(m,stnd)
!
        else
!
            err3 = zero
!
        end if
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( r, q, resid, resid2, norma )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the QR decomposition       &
                          &                                = ', err1
        write (prtunit,*) 'Orthogonality of the Q matrix          &
                          &                                = ', err2
!
        if ( m>n ) then
            write (prtunit,*) 'Orthogonality of the range of the matrix&
                              & and its orthogonal complement = ', err3
        end if
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a QR decomposition of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_qr_cmp
! =========================
!
end program ex1_qr_cmp

ex1_qr_cmp2.F90

program ex1_qr_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines QR_CMP2 and 
!   ORTHO_GEN_QR in module QR_Procedures for computing a QR decomposition with
!   column pivoting of a real matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, c50, qr_cmp2, ortho_gen_qr,   &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, mn=min( m, n )
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of qr_cmp2'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, err1_col, eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, resid2, norma
    real(stnd), allocatable, dimension(:,:) :: a, a2, r, resid
!
    integer(i4b)                            :: krank, l, j, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, test_lin
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE A FULL QR DECOMPOSITION WITH COLUMN PIVOTING
!               OF A RANDOM DATA MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED ON ALL COLUMNS OR ONLY PARTIALLY.
!
    krank = 0
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
    if ( do_test ) then
!
        l = max( m, n )
!
    else
!
        l = n
!
    end if
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,l), diagr(mn), beta(mn), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS FOR LATER USE.
!
        allocate( a2(m,n), r(mn,n), resid(m,l), resid2(n), norma(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A FULL QR DECOMPOSITION WITH COLUMN PIVOTING OF RANDOM DATA MATRIX a
!   WITH SUBROUTINE qr_cmp2.
!
    call qr_cmp2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol )
!
!    call qr_cmp2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank )
!
!   qr_cmp2 COMPUTES A FULL ORTHOGONAL FACTORIZATION OF A REAL m-BY-n MATRIX.
!   THE MATRIX MAY BE RANK-DEFICIENT.
!
!   ON INPUT, krank=k, IMPLIES THAT THE FIRST k COLUMNS OF MATRIX a ARE
!   TO BE FORCED INTO THE BASIS. PIVOTING IS PERFORMED ONLY ON THE LAST n-k
!   COLUMNS OF a.
!
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED AT ALL. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a. THUS, qr_cmp2 IS MORE FLEXIBLE
!   THAN partial_qr_cmp, WHICH PERFORMS THE SAME TASKS, BUT IN WHICH PIVOTING IS
!   ALWAYS PERFORMED ON ALL COLUMNS OF a .
!
!   HERE THE ROUTINE FIRST COMPUTES A FULL QR FACTORIZATION WITH PIVOTING ON ALL
!   COLUMNS OF a (E.G., krank=0) AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF qr_cmp2.
!
!   IN OTHER WORDS, ON EXIT, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF a
!       ( IN THE 1-NORM) ARE PERFORMED. tol IS THEN USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol. IF tol=0 IS SPECIFIED
!       THE NUMERICAL RANK OF a IS DETERMINED.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   AGAIN, THIS DIFFERS FROM THE COMPUTATIONS DONE IN partial_qr_cmp. IN partial_qr_cmp
!   ROUTINE, IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ THE CALCULATIONS TO DETERMINE
!   THE CONDITION NUMBER OF R OR THE TESTS ON THE DIAGONAL OF R ARE NOT PERFORMED AND
!   THE RANK OF R IS ASSUMED TO BE EQUAL TO mn = min(m,n).
!
!   FURTHERMORE, IF tol IS PRESENT AND IS IN [0,1[ IN THE CALL OF qr_cmp2, THE CONDITION
!   NUMBER OF a IS COMPUTED IN ALL CASES AND ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER
!   IS RETURNED IN tol. ON THE OTHER HAND, THE CONDITION NUMBER OF a IS NOT COMPUTED AND RETURNED
!   IF tol=0 IN partial_qr_cmp ROUTINE.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE OF FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!   
!   IN A SECOND STEP, IF THE OPTIONAL PARAMETER tau IS PRESENT, THEN R22 IS CONSIDERED
!   TO BE NEGLIGIBLE AND THE SUBMATRIX R12 IS ANNIHILATED BY ORTHOGONAL TRANSFORMATIONS
!   FROM THE RIGHT, ARRIVING AT THE COMPLETE ORTHOGONAL FACTORIZATION:
!  
!                     a * P ≈ Q * [ T11 0 ] * Z
!                                 [  0  0 ]
!   
!   WHERE P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX,
!   T11 IS A krank-BY-krank UPPER TRIANGULAR MATRIX AND Z IS A n-BY-n
!   ORTHOGONAL MATRIX. P AND Q ARE ARE ALREADY COMPUTED IN THE QR FACTORIZATION
!   WITH COLUM PIVOTING.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR OR COMPLETE 
!   ORTHOGONAL FACTORIZATION, DEPENDING IF ARGUMENT tau IS ABSENT OR PRESENT.
!    
!   IF THE OPTIONAL PARAMETER tau IS ABSENT, qr_cmp2 COMPUTES ONLY
!   A QR FACTORIZATION WITH COLUMN PIVOTING OF a AND:
!
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!    
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G., THE RANK OF R11).
!                      
!   ON THE OTHER HAND, IF THE OPTIONAL PARAMETER tau IS PRESENT, qr_cmp2 COMPUTES A
!   COMPLETE ORTHOGONAL FACTORIZATION FROM THE QR FACTORIZATION WITH COLUMN PIVOTING OF a
!   AND:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY SECTION a(1:krank,1:krank)
!     CONTAIN THE CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX T11. THE ELEMENTS OF
!     THE DIAGONAL OF T11 ARE STORED IN THE ARRAY SECTION diagr(1:krank).
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE COMPLETE DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!
!   - THE ORTHOGONAL MATRIX Z IS STORED IN FACTORED FORM IN THE ARRAY SECTIONS
!     a(:krank,krank+1:n) AND tau(:krank).
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a, I.E., THE ORDER OF
!     THE SUBMATRIX T11.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED CORRECTLY.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    else
!
        test_lin = true
!
    end if
!
    if ( do_test ) then
!
!       RESTORE TRIANGULAR FACTOR R OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       IN MATRIX r(:mn,:n) .
!
        do j = 1_i4b, mn
!
            r(1_i4b:j-1_i4b,j) = a(1_i4b:j-1_i4b,j)
            r(j,j)             = diagr(j)
            r(j+1_i4b:mn,j)    = zero
!
        end do
!
        do j = mn+1_i4b, n
!
            r(1_i4b:mn,j) = a(1_i4b:mn,j)
!
        end do
!
!       GENERATE ORTHOGONAL MATRIX Q OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       AND AN ORTHOGONAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!       a IS NOT ASSUMED OF FULL RANK.
!
        call ortho_gen_qr( a(:m,:m), beta(:krank) )
!
!       ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS A PRODUCT OF k ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(k)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp AND partial_rqr_cmp2.
!
!       THE SIZE OF beta DETERMINES THE NUMBER k OF ELEMENTARY REFLECTORS
!       WHOSE PRODUCT DEFINES THE MATRIX Q.
!
!       NOW a(:m,:krank) IS AN ORTHONORMAL BASIS FOR THE RANGE OF a AND a(:m,krank+1:m) IS
!       AN ORTHONORMAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!
!       APPLY PERMUTATION TO a .
!
        do j = 1_i4b, n
!
            a2(:m,j) = resid(:m,ip(j))
!
        end do
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*P - Q(:m,:krank)*R(:krank,:n).
!
        resid(:m,:n) = a2(:m,:n) - matmul( a(:m,:krank), r(:krank,:n) )
        resid2(:n)   = norm( resid(:m,:n), dim=2_i4b )
        norma(:n)    = norm( a2(:m,:n), dim=2_i4b )
!
        err1_col     = maxval( resid2(:n) / norma(:n) )
        err1         = norm( resid2(:n) )/ norm( norma(:n) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q'*Q.
!
        resid(:m,:m) = abs( resid(:m,:m) - matmul( transpose(a(:m,:m)), a(:m,:m) ) )
!
        do j = 1, m
            resid(j,j) =  resid(j,j) - one
        end do
!
        err2 = maxval( resid(:m,:m) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF (a*P)(:m,:krank) AND ITS ORTHOGONAL COMPLEMENT Q(:m,krank+1:m).
!
        if ( m>krank ) then
!
            resid(:krank,krank+1:m) = matmul( transpose(a2(:m,:krank)), a(:m,krank+1:m) )
            err3 = maxval( abs( resid(:krank,krank+1:m) ) )/real(m,stnd)
!
        else
!
            err3 = zero
!
        end if
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, r, resid, resid2, norma )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Estimated rank of the matrix            &
                          &                                        = ', krank
!
        if ( krank/=mn ) then
            write (prtunit,*) 'Indices of linearly dependent columns   &
                              &                                        = ', ip(krank+1:n)
        end if
!        
        write (prtunit,*) 'Accuracy of the QR decomposition        &
                          &||A - Q*R||/||A||                       = ', err1
!        
        write (prtunit,*) 'Accuracy of the QR decomposition        &
                          &max( ||A(:,i) - Q*R(:,i)||/||A(:,i)|| ) = ', err1_col
        write (prtunit,*) 'Orthogonality of the Q matrix           &
                          &                                        = ', err2
!
        if ( m>krank ) then
            write (prtunit,*) 'Orthogonality of the range of the matrix&
                              & and its orthogonal complement          = ', err3
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a QR decomposition with column pivoting of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip )
!
!
! END OF PROGRAM ex1_qr_cmp2
! ==========================
!
end program ex1_qr_cmp2

ex1_qr_solve.F90

program ex1_qr_solve
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine QR_CMP in module QR_Procedures
!   and QR_SOLVE in module LLSQ_Procedures for solving linear least squares problems of full
!   rank using a QR decomposition.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c500, allocate_error, merror, &
                         qr_cmp, qr_solve, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX,
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX AND
! nrhs IS THE NUMBER OF COLUMNS OF THE RIGHT HAND SIDE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=3000, mn=min( m, n ), nrhs=10
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c500
!
    character(len=*), parameter :: name_proc='Example 1 of qr_solve'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, anorm, err1, err2, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, rnorm
    real(stnd), allocatable, dimension(:,:) :: a, a2, b, x, res
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, do_test
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING LINEAR LEAST SQUARES PROBLEMS WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A QR DECOMPOSITION OF THE COEFFICIENT MATRIX
!               AND SEVERAL RIGHT HAND-SIDES. THE COEFFICIENT MATRIX IS
!               ASSUMED OF FULL RANK, BUT BOTH m>=n OR m<n ARE PERMITTED.
!
!               COMPUTE SOLUTION MATRIX x FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n,:nrhs) ≈ b(:m,:nrhs) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF RESIDUAL MATRIX MUST BE COMPUTED.
!
    comp_resid = do_test
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m,nrhs), x(n,nrhs), diagr(mn), beta(mn), rnorm(nrhs), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE MATRIX b(:m,:nrhs) .
!
    call random_number( b(:m,:nrhs) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS FOR LATER USE.
!
        allocate( a2(m,n), res(nrhs,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE DATA MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A QR FACTORIZATION OF RANDOM DATA MATRIX WITH SUBROUTINE qr_cmp.
!
    call qr_cmp( a(:m,:n), diagr(:mn), beta(:mn) )
!
!   qr_cmp COMPUTES AN ORTHOGONAL FACTORIZATION OF A REAL m-BY-n MATRIX
!   a . a IS ASSUMED OF FULL RANK. THE ROUTINE COMPUTES A QR FACTORIZATION
!   OF a AS:
!
!                     a = Q * R
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS ORTHOGONAL
!   FACTORIZATION. 
!
!   THE MATRIX Q IS REPRESENTED AS A PRODUCT OF ELEMENTARY REFLECTORS
!
!            Q = h(1)*h(2)* ... *h(k), WHERE k = min( size(a,1) , size(a,2) )
!
!   EACH h(i) HAS THE FORM
!
!            h(i) = I + BETA * ( V * V' ) ,
!                      
!   WHERE BETA IS A REAL SCALAR AND V IS A REAL m-ELEMENTS VECTOR WITH V(1:i-1) = 0.
!   V(i:m) IS STORED ON EXIT IN a(i:m,i) AND BETA IN beta(i).
!                      
!   THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!   CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX R. THE ELEMENTS
!   OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr. 
!
!   NEXT, COMPUTE THE SOLUTION MATRIX FOR LINEAR LEAST SQUARES SYSTEM
!
!                   a(:m,:n)*x(:n,:nrhs) ≈ b(:m,:nrhs) .
!
!   WITH SUBROUTINE qr_solve AND THE QR DECOMPOSITION COMPUTED BY qr_cmp.
!
    call qr_solve( a(:m,:n), diagr(:mn), beta(:mn), b(:m,:nrhs), x(:n,:nrhs),  &
                   rnorm=rnorm(:nrhs), comp_resid=comp_resid )
!
!   qr_solve COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING A QR FACTORIZATION COMPUTED BY qr_cmp. a IS AN m-BY-n MATRIX
!   WHICH IS ASSUMED OF FULL RANK, BUT BOTH m>=n OR n>m ARE PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES. b IS OVERWRITTEN BY qr_solve.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND OVERWRITES b.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL ARRAY ARGUMENT rnorm IN THE CALL OF qr_solve.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
        res(:nrhs,:n) = matmul( transpose( b(:m,:nrhs) ), a2(:m,:n) )
!
        err1 = maxval( abs(res(:nrhs,:n)) )/anorm
!
!       CHECK THE NORMS OF THE COLUMNS OF RESIDUAL MATRIX.
!
        err2 = maxval( abs( norm( b(:m,:nrhs), dim=2_i4b ) - rnorm(:nrhs) ) )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, res )
!
    end if
!
!   PRINT THE RESULTS OF THE TESTS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) '2-norm of residual vectors ||a*x(:,i)-b(:,i)|| = ', rnorm(:nrhs)
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
      m, ' by ', n,' real coefficient matrix and a ', m, ' by ', nrhs,       &
      ' right hand side matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, diagr, beta, rnorm )
!
!
! END OF PROGRAM ex1_qr_solve
! ===========================
!
end program ex1_qr_solve

ex1_qr_solve2.F90

program ex1_qr_solve2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine QR_CMP2 in module QR_Procedures
!   and QR_SOLVE2 in module LLSQ_Procedures for solving full or deficient linear least squares
!   problems using a complete orthogonal decomposition of the coefficient matrix.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c500, allocate_error, merror, &
                         qr_cmp2, qr_solve2, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX,
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX AND
! nrhs IS THE NUMBER OF COLUMNS OF THE RIGHT HAND SIDE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=3000, mn=min( m, n ), nrhs=10
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c500
!
    character(len=*), parameter :: name_proc='Example 1 of qr_solve2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, anorm, err1, err2, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, tau, rnorm
    real(stnd), allocatable, dimension(:,:) :: a, a2, b, x, res
!
    integer(i4b)                            :: krank, j, l, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, do_test, test_lin
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING LINEAR LEAST SQUARES PROBLEMS WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A COMPLETE ORTHOGONAL DECOMPOSITION OF THE COEFFICIENT MATRIX
!               AND SEVERAL RIGHT HAND-SIDES. THE COEFFICIENT MATRIX CAN BE RANK DEFICIENT
!               AND BOTH m>=n OR m<n ARE PERMITTED.
!
!               COMPUTE SOLUTION MATRIX x FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n,:nrhs) ≈ b(:m,:nrhs) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED AND
!   IF RESIDUAL MATRIX MUST BE COMPUTED.
!
    krank = 0
!
    comp_resid = do_test
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m,nrhs), x(n,nrhs), diagr(mn), beta(mn), tau(mn),  &
              rnorm(nrhs), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE MATRIX b(:m,:nrhs) .
!
    call random_number( b(:m,:nrhs) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS FOR LATER USE.
!
        allocate( a2(m,n), res(nrhs,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE DATA MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A COMPLETE ORTHOGONAL DECOMPOSITION OF RANDOM DATA MATRIX WITH SUBROUTINE qr_cmp2.
!
    call qr_cmp2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol, tau=tau(:mn) )
!
!   qr_cmp2 COMPUTES A QR OR COMPLETE ORTHOGONAL FACTORIZATION OF A REAL m-BY-n MATRIX.
!   THE MATRIX MAY BE RANK-DEFICIENT.
!
!   HERE THE ROUTINE COMPUTES A COMPLETE ORTHOGONAL FACTORIZATION OF a.
!
!   A QR FACTORIZATION WITH COLUMN PIVOTING OF a IS FIRST COMPUTED AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF qr_cmp2.
!
!   IN OTHER WORDS, ON EXIT, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!
!   ON INPUT, IF krank=k, THIS IMPLIES THAT THE FIRST k COLUMNS OF a ARE TO BE FORCED
!   INTO THE BASIS. PIVOTING IS PERFORMED ON THE LAST n-k COLUMNS OF a.
!   
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF a
!       ( IN THE 1-NORM) ARE PERFORMED. tol IS THEN USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol. IF tol=0 IS SPECIFIED
!       THE NUMERICAL RANK OF a IS DETERMINED.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   FURTHERMORE, IF tol IS PRESENT AND IS IN [0,1[ IN THE CALL OF qr_cmp2, THE CONDITION
!   NUMBER OF a IS COMPUTED IN ALL CASES AND ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER
!   IS RETURNED IN tol.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE OF FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!   
!   IN A SECOND STEP, IF THE OPTIONAL PARAMETER tau IS PRESENT IN THE CALL OF qr_cmp2,
!   THEN R22 IS CONSIDERED TO BE NEGLIGIBLE AND THE SUBMATRIX R12 IS ANNIHILATED BY
!   ORTHOGONAL TRANSFORMATIONS FROM THE RIGHT, ARRIVING AT THE COMPLETE ORTHOGONAL
!   FACTORIZATION:
!  
!                     a * P ≈ Q * [ T11 0 ] * Z
!                                 [  0  0 ]
!   
!   WHERE P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX,
!   T11 IS A krank-BY-krank UPPER TRIANGULAR MATRIX AND Z IS A n-BY-n
!   ORTHOGONAL MATRIX. P AND Q ARE ARE ALREADY COMPUTED IN THE QR FACTORIZATION
!   WITH COLUM PIVOTING.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR OR COMPLETE 
!   ORTHOGONAL FACTORIZATION, DEPENDING IF ARGUMENT tau IS ABSENT OR PRESENT.
!    
!   IF THE OPTIONAL PARAMETER tau IS ABSENT, qr_cmp2 COMPUTES ONLY
!   A FULL QR FACTORIZATION WITH COLUMN PIVOTING OF a AND:
!
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!    
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G., THE RANK OF R11).
!                      
!   ON THE OTHER HAND, IF THE OPTIONAL PARAMETER tau IS PRESENT, qr_cmp2 COMPUTES A
!   COMPLETE ORTHOGONAL FACTORIZATION FROM THE QR FACTORIZATION WITH COLUMN PIVOTING OF a
!   AND:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY SECTION a(1:krank,1:krank)
!     CONTAIN THE CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX T11. THE ELEMENTS OF
!     THE DIAGONAL OF T11 ARE STORED IN THE ARRAY SECTION diagr(1:krank).
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE COMPLETE DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!
!   - THE ORTHOGONAL MATRIX Z IS STORED IN FACTORED FORM IN THE ARRAY SECTIONS
!     a(:krank,krank+1:n) AND tau(:krank).
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a, I.E., THE ORDER OF
!     THE SUBMATRIX T11.
!                      
!   THIS COMPLETE ORTHOGONAL FACTORIZATION CAN THEN BE USED TO COMPUTE THE MINIMAL 2-NORM
!   SOLUTIONS FOR RANK DEFICIENT LINEAR LEAST SQUARES PROBLEMS WITH SUBROUTINE qr_solve2.
!                      
!
!   NEXT, COMPUTE THE SOLUTION MATRIX FOR LINEAR LEAST SQUARE SYSTEM
!
!                   a(:m,:n)*x(:n,:nrhs) ≈ b(:m,:nrhs) .
!
!   WITH SUBROUTINE qr_solve2 AND THE COMPLETE ORTHOGONAL DECOMPOSITION COMPUTED BY qr_cmp2.
!
    call qr_solve2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, b(:m,:nrhs), x(:n,:nrhs),  &
                    tau=tau(:mn), rnorm=rnorm(:nrhs), comp_resid=comp_resid )
!
!   qr_solve2 COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING A QR FACTORIZATION WITH COLUMNS PIVOTING OR A COMPLETE
!   ORTHOGONAL FACTORIZATION OF a COMPUTED BY qr_cmp2. a IS AN m-BY-n MATRIX
!   WHICH MAY BE RANK-DEFICIENT. m>=n OR n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES. b IS OVERWRITTEN BY qr_solve2.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND OVERWRITES b.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL ARRAY ARGUMENT rnorm IN THE CALL OF qr_solve2.
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL ARRAY PARAMETER tau IS
!   PRESENT IN BOTH THE CALLS OF qr_cmp2 AND qr_solve2 SUBROUTINES. OTHERWISE, SOLUTION(S) ARE
!   COMPUTED SUCH THAT IF THE jTH COLUMN OF a IS OMITTED FROM THE BASIS, x[j] O
!   x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED CORRECTLY.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    else
!
        test_lin = true
!
    end if
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
        res(:nrhs,:n) = matmul( transpose( b(:m,:nrhs) ), a2(:m,:n) )
!
        err1 = maxval( abs(res(:nrhs,:n)) )/anorm
!
!       CHECK THE NORMS OF THE COLUMNS OF RESIDUAL MATRIX.
!
        err2 = maxval( abs( norm( b(:m,:nrhs), dim=2_i4b ) - rnorm(:nrhs) ) )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, res )
!
    end if
!
!   PRINT THE RESULTS OF THE TESTS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    if ( krank/=mn ) then
        write (prtunit,*) 'Indices of linearly dependent columns    = ', ip(krank+1:n)
    end if
    write (prtunit,*) '2-norm of residual vectors ||a*x(:,i)-b(:,i)|| = ', rnorm(:nrhs)
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
      m, ' by ', n,' real coefficient matrix and a ', m, ' by ', nrhs,       &
      ' right hand side matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, diagr, beta, tau, rnorm, ip )
!
!
! END OF PROGRAM ex1_qr_solve2
! ============================
!
end program ex1_qr_solve2

ex1_quick_sort.F90

program ex1_quick_sort
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine QUICK_SORT
!   in module Sort_Procedures for sorting real or integer sequences by a
!   quick sort algorithm.
!
!
! LATEST REVISION : 29/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, false, arth, quick_sort
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE ORDER OF THE SEQUENCES.
!
    integer(i4b), parameter :: prtunit=6, n=100
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n) :: x
!
    integer(i4b)               :: i, i1, i2, j, k
    integer(i4b), dimension(n) :: y
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of quick_sort'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE RANDOM REAL DATA TO SORT.
!
    call random_number( x )
!
!   GENERATE A RANDOM ORDERING OF THE INTEGERS 1 TO n.
!
    y = arth( 1_i4b, 1_i4b, n ) 
!    
!   STARTING AT THE END, SWAP THE CURRENT LAST INDICATOR WITH ONE
!   RANDOMLY CHOSEN FROM THOSE PRECEEDING IT.

    do i = n, 2, -1
        j = 1 + i * x(i)
        if (j < i) then
            k    = y(i)
            y(i) = y(j)
            y(j) = k
        end if
    end do
!
!   EXAMPLE 1 : SORT THE REAL DATA IN ASCENDING ORDER.
!
    call quick_sort( x  )
!
!   CHECK THAT THE SORTED ARRAY IS NON-DECREASING.
!
    i1 = count( x(1:n-1) > x(2:n) )
!
!   EXAMPLE 2 : SORT THE INTEGER DATA IN DECREASING ORDER.
!
    call quick_sort( y, ascending=false )
!
!   CHECK THAT THE SORTED ARRAY IS NON-INCREASING.
!
    i2 = count( y(1:n-1) < y(2:n) )
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( i1==0 .and. i2==0 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_quick_sort
! =============================
!
end program ex1_quick_sort

ex1_random_eig.F90

program ex1_random_eig
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial EigenValue
!   Decomposition (EVD) of a symmetric matrix with randomized power subspace iterations
!   using STATPACK.
!                                                                              
!                                                     
! Further Details
! ===============
!
!   The program shows the use of subroutines QR_CMP and ORTHO_GEN_QR
!   in module QR_Procedures, EIG_CMP in module Eig_procedures, RANDOM_NUMBER_,
!   NORMAL_RANDOM_NUMBER3_ and GEN_RANDOM_SYM_MAT in module Random.
!                                                                              
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, one, seven, c30, merror, allocate_error, &
                         norm, unit_matrix, random_seed_, random_number_, normal_random_number3_,          &
                         eig_abs_sort, qr_cmp, ortho_gen_qr, eig_cmp, gen_random_sym_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, neig IS THE TARGET RANK OF THE APPROXIMATE PARTIAL EVD DECOMPOSITION,
! neig0 IS THE RANK OF THE GENERATED MATRIX,
! n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=2000, neig=5, neig0=1000
!
    character(len=*), parameter :: name_proc='Example 1 of random_eig'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, eps, elapsed_time, tmp, norma, relerr, relerr2
    real(stnd), dimension(:,:), allocatable :: a, q, qt, b, eigvec, v
    real(stnd), dimension(:),   allocatable :: diagr, beta, eigval0, eigval
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: p, np, i, niter, mat_type
!
    logical(lgl) :: failure, do_test, ortho
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUE DECOMPOSITION OF A n-BY-n REAL SYMMETRIC
!               MATRIX USING RANDOMIZED POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES      (IN ABSOLUTE MAGNITUDE)
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES      (IN ABSOLUTE MAGNITUDE)
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES  (IN ABSOLUTE MAGNITUDE)
!   mat_type > 3  -> VERY SLOW DECAY OF EIGENVALUES (IN ABSOLUTE MAGNITUDE)
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL EVD.
!
    eps = 0.001_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE EIGEN COUPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED EVD ALGORITHM.
!
!   DETERMINE THE NUMBER OF POWER ITERATIONS niter TO BE PERFORMED.
!
    niter = 6_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE p .
!
    p  = 20_i4b
!
!   CHECK VALIDITY OF THE OVERSAMPLING SIZE PARAMETER.
!
    np = min( p + neig, n )
!
!   SPECIFY IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS OF ACCURACY DUE
!   TO ROUNDING ERRORS. THIS IS NORMALLY NOT NEEDED FOR POSITIVE MATRIX.
!
    ortho = true
!
!   ALLOCATE WORK ARRAYS.
!
    i = max( np, neig0 )
!
    allocate( a(n,n), q(n,np), qt(np,n), b(n,np), v(np,np), eigvec(n,neig), &
              diagr(np), beta(i), eigval0(neig0), eigval(neig), stat=iok    )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH
!   A SPECIFIED DISTRIBUTION OF EIGENVALUES.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE EIGENVALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            norma = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp = real( i - 1_i4b, stnd )
!
                eigval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!  
!   CHANGE SIGN OF HALF OF THE EIGENVALUES.
!
    call random_number_( beta(:neig0) )
!
    where ( beta(:neig0)>half ) eigval0(:neig0) = -eigval0(:neig0)
!
!   SORT THE EIGENVALUES BY DECREASING ABSOLUTE MAGNITUDE.
!
    call eig_abs_sort( sort, eigval0(:neig0) )
!
!   GENERATE A SYMMETRIC MATRIX a WITH THE SPECIFIED EIGENVALUES
!   AND RANK neig0.
!
    call gen_random_sym_mat( eigval0(:neig0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE SYMMETRIC MATRIX.
!
!    norma = norm( a(:n,:n) )
    norma = sqrt(sum( eigval0(:neig0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   GENERATE A n-BY-np IID GAUSSIAN MATRIX .
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number3_.
!
!    call normal_random_number3_( b(:n,:np) )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
    do i = 1_i4b, np
!
        call normal_random_number3_( b(:n,i) )
!
    end do
!
!   COMPUTE RANDOM SAMPLE MATRIX.
!
    q(:n,:np) = matmul( a(:n,:n), b(:n,:np) )
!
!   DO POWER ITERATIONS.
!
    do i = 1_i4b, niter
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM SAMPLE MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( q(:n,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM SAMPLE MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( q(:n,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
        b(:n,:np) = matmul( a(:n,:n), q(:n,:np)  )
!
        q(:n,:np) = b(:n,:np)
!
    end do
!
!   COMPUTE QR DECOMPOSITION OF RANDOM SAMPLE MATRIX TO OBTAIN AN
!   ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
    call qr_cmp( q(:n,:np), diagr(:np), beta(:np) )
!
!   GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM SAMPLE MATRIX.
!   q IS ASSUMED OF FULL RANK.
!
    call ortho_gen_qr( q(:n,:np), beta(:np) )
!
!   COMPUTE FINAL RANDOM SUBSPACE PROJECTION.
!
    b(:n,:np) = matmul( a(:n,:n), q(:n,:np) )
!
!   COMPUTE v = q**(t)*b = q**(t)*a*q .
!
    qt(:np,:n) = transpose( q(:n,:np) )
!
    v(:np,:np) = matmul( qt(:np,:n), b(:n,:np) )
!
!   USE A SPECTRAL DECOMPOSITION.
!
    call eig_cmp( v(:np,:np), beta(:np), failure, maxiter=30_i4b )
!
    call eig_abs_sort( sort, beta(:np), v(:np,:np) )
!
!   COMPUTE THE APPROXIMATE TOP neig EIGENVECTORS OF a .
!
    eigvec(:n,:neig) = matmul( q(:n,:np), v(:np,:neig) )
!
!   EXTRACT THE APPROXIMATE TOP neig EIGENVALUES (IN ABSOLUTE MAGNITUDE) OF a .
!
    eigval(:neig) = beta(:neig)
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (eigval(1_i4b:neig)/norma)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    relerr2 = sqrt( sum( (eigval0(neig+1_i4b:neig0)/norma)**2 ) )
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE EIGEN COUPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u(:n,:neig) - u(:n,:neig)*diag(eigval(:neig)),
!       WHERE u ARE THE EIGENVECTORS OF a.
!
        q(:n,:neig) = matmul(a,eigvec) - eigvec*spread(eigval(:neig),dim=1,ncopies=n)
        beta(:neig) = norm( q(:n,:neig), dim=2_i4b )
!
        err1 = maxval( beta(:neig) )/( norma*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:neig)**(t)*u(:n,:neig).
!
        call unit_matrix( q(:neig,:neig) )
!
        v(:neig,:neig) = abs( q(:neig,:neig) - matmul( transpose(eigvec), eigvec ) )
!
        err2 = maxval( v(:neig,:neig) )/real(n,stnd)
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, q, qt, b, v, diagr, beta, eigval0, eigval, eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_random_eig
! =============================
!
end program ex1_random_eig

ex1_random_eig_pos.F90

program ex1_random_eig_pos
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate eigenvalue decomposition
!   of a symmetric positive (semi-)definite matrix with randomized subspace iterations and/or
!   the Nystrom method using STATPACK.
!                                                                              
!                                                     
! Further Details
! ===============
!
!   The program also shows the use of subroutines SVD_CMP in module SVD_Procedures,
!   EIG_CMP in module EIG_Procedures, QR_CMP, ORTHO_GEN_QR in module QR_Procedures,
!   CHOL_CMP in module Lin_procedures, and NORMAL_RANDOM_NUMBER3_ and GEN_RANDOM_SYM_MAT
!   in module Random.
!                                                                              
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, merror, allocate_error, &
                         norm, unit_matrix, random_seed_, normal_random_number3_, chol_cmp,          &
                         eigval_sort, eig_abs_sort, qr_cmp, ortho_gen_qr, svd_cmp, eig_cmp,          &
                         gen_random_sym_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, neig IS THE TARGET RANK OF THE APPROXIMATE PARTIAL EVD DECOMPOSITION,
! neig0 IS THE RANK OF THE GENERATED MATRIX,
! n IS THE DIMENSION OF THE GENERATED SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=1000, neig=5, neig0=1000
!
    character(len=*), parameter :: name_proc='Example 1 of random_eig_pos'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: d1, err, err1, err2, eps, elapsed_time, norma, tmp, relerr, relerr2
    real(stnd), dimension(:,:), allocatable :: a, q, qt, b, v, eigvec
    real(stnd), dimension(:),   allocatable :: diagr, beta, eigval, eigval0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: p, np, i, j, niter, mat_type
!
    logical(lgl) :: failure, do_test, ortho, use_nystrom
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUE DECOMPOSITION OF A n-BY-n REAL SYMMETRIC POSITIVE
!               SEMI-DEFINITE MATRIX USING RANDOMIZED POWER SUBSPACE OR BLOCK KRYLOV
!               ITERATIONS AND THE NYSTROM METHOD.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES
!   mat_type > 3  -> VERY SLOW DECAY OF EIGENVALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL EVD.
!
    eps = 0.001_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE EIGEN COUPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED EVD ALGORITHM.
!
!   DETERMINE THE NUMBER OF POWER ITERATIONS niter TO BE PERFORMED.
!
    niter = 6_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE p .
!
    p = 20_i4b
!
!   CHECK VALIDITY OF THE OVERSAMPLING SIZE PARAMETER.
!
    np = min( p + neig, n )
!
!   SPECIFY IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS OF ACCURACY DUE
!   TO ROUNDING ERRORS. THIS IS NORMALLY NOT NEEDED FOR POSITIVE MATRIX.
!
    ortho = true
!
!   SPECIFY IF LAST STEP OF THE ALGORITHM IS PERFORMED WITH THE NYSTROM METHOD (AND A SVD)
!   OR AN EIGENVALUE DECOMPOSITION.
!
    use_nystrom = true
!
    d1 = one
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), q(n,np), qt(np,n), b(n,np), v(np,np), diagr(np),         &
              beta(np), eigval0(neig0), eigval(neig), eigvec(n,neig), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM REAL SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX a WITH A
!   SPECIFIED DISTRIBUTION OF EIGENVALUES.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE EIGENVALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            norma = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp = real( i - 1_i4b, stnd )
!
                eigval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SORT THE EIGENVALUES BY DECREASING MAGNITUDE.
!
    call eigval_sort( sort, eigval0(:neig0) )
!
!   GENERATE A SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX a WITH THE SPECIFIED EIGENVALUES
!   AND RANK neig0.
!
    call gen_random_sym_mat( eigval0(:neig0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX.
!
!    norma = norm( a(:n,:n) )
    norma = sqrt(sum( eigval0(:neig0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   GENERATE A n-BY-np IID GAUSSIAN MATRIX .
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number3_.
!
!    call normal_random_number3_( b(:n,:np) )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
    do i = 1_i4b, np
!
        call normal_random_number3_( b(:n,i) )
!
    end do
!
!   COMPUTE RANDOM SAMPLE MATRIX.
!
    q(:n,:np) = matmul( a(:n,:n), b(:n,:np) )
!
!   DO POWER ITERATIONS.
!
    do i = 1_i4b, niter
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM SAMPLE MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( q(:n,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM SAMPLE MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( q(:n,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
        b(:n,:np) = matmul( a(:n,:n), q(:n,:np) )
!
        q(:n,:np) = b(:n,:np)
!
    end do
!
!   COMPUTE QR DECOMPOSITION OF RANDOM SAMPLE MATRIX TO OBTAIN AN
!   ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
    call qr_cmp( q(:n,:np), diagr(:np), beta(:np) )
!
!   GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM SAMPLE MATRIX.
!   q IS ASSUMED OF FULL RANK.
!
    call ortho_gen_qr( q(:n,:np), beta(:np) )
!
!   COMPUTE b = a*q AND v = q**(t)*b .
!
    b(:n,:np) = matmul( a(:n,:n), q(:n,:np) )
!
    qt(:np,:n) = transpose( q(:n,:np) )
!
    v(:np,:np) = matmul( qt(:np,:n), b(:n,:np) )
!
    if ( use_nystrom ) then
!
!       USE THE NYSTROM METHOD.
!
!       SAVE v IF CHOLESKY FACTORIZATION FAILS.
!
        qt(:np,:np) = v(:np,:np)    
!
!       COMPUTE CHOLESKY FACTORIZATION OF v = c**(t)*c .
!
        call chol_cmp( v(:np,:np), diagr(:np), d1 )
!
    else
!
!       USE AN EVD DECOMPOSITION AS A BACKUP.
!
        d1 = zero
!
    end if
!
    if ( d1/=zero ) then
!
!       USE THE NYSTROM METHOD.
!
!       COMPUTE F = b*c**(-1) USING A TRIANGULAR SOLVE.
!
        qt(:np,:n) = transpose( b(:n,:np) )    
!
        do i = 1_i4b, n
!
            do j = 1_i4b, np
!
                qt(j,i) = ( qt(j,i) - dot_product( v(1_i4b:j-1_i4b,j), qt(1_i4b:j-1_i4b,i) ) )*diagr(j)
!
            end do
!
        end do
!
        b(:n,:np) = transpose( qt(:np,:n) )    
!
!       COMPUTE SVD OF THE CHOLESKY FACTOR.
!
        call svd_cmp( b(:n,:np), beta(:np), failure, v=v(:np,:np), sort=sort, max_francis_steps=10_i4b )
!
!       EXTRACT THE APPROXIMATE TOP neig EIGENVECTORS OF a .
!
        eigvec(:n,:neig) = b(:n,:neig)
!
!       COMPUTE THE APPROXIMATE TOP neig EIGENVALUES OF a .
!
        eigval(:neig) = beta(:neig)*beta(:neig)
!
    else
!
        if ( use_nystrom ) then
!
!           RESTORE v AS CHOLESKY FACTORIZATION FAILED.
!
            v(:np,:np) = qt(:np,:np) 
!
        end if
!
!       USE A STANDARD SPECTRAL DECOMPOSITION.
!
        call eig_cmp( v(:np,:np), beta(:np), failure, sort=sort, maxiter=30_i4b )
!
        call eig_abs_sort( sort, beta(:np), v(:np,:np) )
!
!       COMPUTE THE APPROXIMATE TOP neig EIGENVECTORS OF a .
!
        eigvec(:n,:neig) = matmul( q(:n,:np), v(:np,:neig) )
!
!       EXTRACT THE APPROXIMATE TOP neig EIGENVALUES OF a .
!
        eigval(:neig) = beta(:neig)
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (eigval(1_i4b:neig)/norma)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    relerr2 = sqrt( sum( (eigval0(neig+1_i4b:neig0)/norma)**2 ) )
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE EIGEN COUPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u(:n,:neig) - u(:n,:neig)*diag(eigval(:neig)),
!       WHERE u ARE THE EIGENVECTORS OF a.
!
        q(:n,:neig) = matmul(a,eigvec) - eigvec*spread(eigval(:neig),dim=1,ncopies=n)
        beta(:neig) = norm( q(:n,:neig), dim=2_i4b )
!
        err1 = maxval( beta(:neig) )/( norma*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:neig)**(t)*u(:n,:neig).
!
        call unit_matrix( q(:neig,:neig) )
!
        v(:neig,:neig) = abs( q(:neig,:neig) - matmul( transpose(eigvec), eigvec ) )
!
        err2 = maxval( v(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, q, qt, b, v, diagr, beta, eigval0, eigval, eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric positive semi-definite matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_random_eig_pos
! =================================
!
end program ex1_random_eig_pos

ex1_random_eig_pos_with_blas.F90

program ex1_random_eig_pos_with_blas
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate eigenvalue decomposition
!   of a symmetric positive (semi-)definite matrix with randomized subspace iterations and/or
!   the Nystrom method using STATPACK and BLAS subroutines.
!                                                                              
!                                                     
! Further Details
! ===============
!
!   The program shows the use of subroutines SVD_CMP in module SVD_Procedures,
!   EIG_CMP in module Eig_Procedures, QR_CMP, ORTHO_GEN_QR in module QR_Procedures,
!   CHOL_CMP in module Lin_procedures, and NORMAL_RANDOM_NUMBER3_ and GEN_RANDOM_SYM_MAT
!   in module Random and gemm, symm, syrk generic interfaces in module BLAS_interfaces.
!                                                                              
! LATEST REVISION : 23/10/2020
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, merror, allocate_error, &
                         norm, unit_matrix, random_seed_, normal_random_number3_, chol_cmp,          &
                         eigval_sort, eig_abs_sort, qr_cmp, ortho_gen_qr, svd_cmp, eig_cmp,          &
                         gen_random_sym_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
#ifdef _BLAS
!    use BLAS_interfaces, only : gemm
    use BLAS_interfaces, only : symm, syrk, gemm
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, neig IS THE TARGET RANK OF THE APPROXIMATE PARTIAL EVD DECOMPOSITION,
! neig0 IS THE RANK OF THE GENERATED MATRIX,
! n IS THE DIMENSION OF THE GENERATED SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=2000, neig=5, neig0=1000
!
    character(len=*), parameter :: name_proc='Example 1 of random_eig_pos_with_blas'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: d1, err, err1, err2, eps, elapsed_time, norma, tmp, relerr, relerr2
#ifdef _BLAS
    real(stnd), dimension(:,:), allocatable :: a, q, b, bt, v, eigvec
#else
    real(stnd), dimension(:,:), allocatable :: a, q, qt, b, bt, v, eigvec
#endif
    real(stnd), dimension(:),   allocatable :: diagr, beta, eigval, eigval0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: p, np, i, j, niter, mat_type
!
    logical(lgl) :: failure, do_test, ortho, use_nystrom
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUE DECOMPOSITION OF A n-BY-n REAL SYMMETRIC POSITIVE
!               SEMI-DEFINITE MATRIX USING RANDOMIZED POWER SUBSPACE OR BLOCK KRYLOV
!               ITERATIONS AND THE NYSTROM METHOD.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES
!   mat_type > 3  -> VERY SLOW DECAY OF EIGENVALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL EVD.
!
    eps = 0.001_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE EIGEN COUPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED EVD ALGORITHM.
!
!   DETERMINE THE NUMBER OF POWER ITERATIONS niter TO BE PERFORMED.
!
    niter = 6_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE p .
!
    p  = 20_i4b
!
!   CHECK VALIDITY OF THE OVERSAMPLING SIZE PARAMETER.
!
    np = min( p + neig, n )
!
!   SPECIFY IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS OF ACCURACY DUE
!   TO ROUNDING ERRORS. THIS IS NORMALLY NOT NEEDED FOR POSITIVE MATRIX.
!
    ortho = false
!
!   SPECIFY IF LAST STEP OF THE ALGORITHM IS PERFORMED WITH THE NYSTROM METHOD (AND A SVD)
!   OR AN EIGENVALUE DECOMPOSITION.
!
    use_nystrom = true
!
    d1 = one
!
!   ALLOCATE WORK ARRAYS.
!
#ifdef _BLAS
    allocate( a(n,n), q(n,np), b(n,np), v(np,np), diagr(np),                    &
              beta(np),  eigval0(neig0), eigval(neig), eigvec(n,neig), stat=iok )
#else
    allocate( a(n,n), q(n,np), qt(np,n), b(n,np), v(np,np), diagr(np),          &
              beta(np),  eigval0(neig0), eigval(neig), eigvec(n,neig), stat=iok )
#endif
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM REAL SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX a WITH A
!   SPECIFIED DISTRIBUTION OF EIGENVALUES.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE EIGENVALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            norma = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp = real( i - 1_i4b, stnd )
!
                eigval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SORT THE EIGENVALUES BY DECREASING MAGNITUDE.
!
    call eigval_sort( sort, eigval0(:neig0) )
!
!   GENERATE A SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX a WITH THE SPECIFIED EIGENVALUES
!   AND RANK neig0.
!
    call gen_random_sym_mat( eigval0(:neig0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX.
!
!    norma = norm( a(:n,:n) )
    norma = sqrt(sum( eigval0(:neig0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   GENERATE A n-BY-np IID GAUSSIAN MATRIX .
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number3_.
!
!    call normal_random_number3_( b(:n,:np) )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
    do i = 1_i4b, np
!
        call normal_random_number3_( b(:n,i) )
!
    end do
!
!   COMPUTE RANDOM SAMPLE MATRIX.
!
#ifdef _BLAS
!    call gemm( 'N', 'N', n, np, n, one, a(1_i4b:n,1_i4b:n), n,       &
!               b(1_i4b:n,1_i4b:np), n, zero, q(1_i4b:n,1_i4b:np), n  )
    call symm( 'L', 'U', n, np, one, a(1_i4b:n,1_i4b:n), n,          &
               b(1_i4b:n,1_i4b:np), n, zero, q(1_i4b:n,1_i4b:np), n  )
#else
    q(:n,:np) = matmul( a(:n,:n), b(:n,:np) )
#endif
!
!   DO POWER ITERATIONS.
!
    do i = 1_i4b, niter
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM SAMPLE MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( q(:n,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM SAMPLE MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( q(:n,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
#ifdef _BLAS
!        call gemm( 'N', 'N', n, np, n, one, a(1_i4b:n,1_i4b:n), n,       &
!                   q(1_i4b:n,1_i4b:np), n, zero, b(1_i4b:n,1_i4b:np), n  )
        call symm( 'L', 'U', n, np, one, a(1_i4b:n,1_i4b:n), n,          &
                   q(1_i4b:n,1_i4b:np), n, zero, b(1_i4b:n,1_i4b:np), n  )
#else
        b(:n,:np) = matmul( a(:n,:n), q(:n,:np)  )
#endif
!
        q(:n,:np) = b(:n,:np)
!
    end do
!
!   COMPUTE QR DECOMPOSITION OF RANDOM SAMPLE MATRIX TO OBTAIN AN
!   ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
    call qr_cmp( q(:n,:np), diagr(:np), beta(:np) )
!
!   GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM SAMPLE MATRIX.
!   q IS ASSUMED OF FULL RANK.
!
    call ortho_gen_qr( q(:n,:np), beta(:np) )
!
!   COMPUTE b = a*q AND v = q**(t)*b .
!
#ifdef _BLAS
!    call gemm( 'N', 'N', n, np, n, one, a(1_i4b:n,1_i4b:n), n,       &
!               q(1_i4b:n,1_i4b:np), n, zero, b(1_i4b:n,1_i4b:np), n  )
    call symm( 'L', 'U', n, np, one, a(1_i4b:n,1_i4b:n), n,          &
               q(1_i4b:n,1_i4b:np), n, zero, b(1_i4b:n,1_i4b:np), n  )
!
     call gemm( 'T', 'N', np, np, n, one, q(1_i4b:n,1_i4b:np), n,       &
                b(1_i4b:n,1_i4b:np), n, zero, v(1_i4b:np,1_i4b:np), np  )
#else
    b(:n,:np) = matmul( a(:n,:n), q(:n,:np)  )
!
    qt(:np,:n) = transpose( q(:n,:np) )
!
    v(:np,:np) = matmul( qt(:np,:n), b(:n,:np) )
#endif
!
    if ( use_nystrom ) then
!
!       USE THE NYSTROM METHOD.
!
!       ALLOCATE WORK ARRAY.
!
        allocate( bt(np,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE v IF CHOLESKY FACTORIZATION FAILS.
!
        bt(:np,:np) = v(:np,:np)   
!
!       COMPUTE CHOLESKY FACTORIZATION OF v = c**(t)*c .
!
        call chol_cmp( v(:np,:np), diagr(:np), d1 )
!
    else
!
!       USE AN EVD DECOMPOSITION AS A BACKUP.
!
        d1 = zero
!
    end if
!
    if ( d1/=zero ) then
!
!       USE THE NYSTROM METHOD.
!
!       COMPUTE F = b*c**(-1) USING A TRIANGULAR SOLVE.
!
        bt(:np,:n) = transpose( b(:n,:np) )    
!
        do i = 1_i4b, n
!
            do j = 1_i4b, np
!
                bt(j,i) = ( bt(j,i) - dot_product( v(1_i4b:j-1_i4b,j), bt(1_i4b:j-1_i4b,i) ) )*diagr(j)
!
            end do
!
        end do
!
        b(:n,:np) = transpose( bt(:np,:n) )    
!
!       COMPUTE SVD OF THE CHOLESKY FACTOR.
!
        call svd_cmp( b(:n,:np), beta(:np), failure, v=v(:np,:np), sort=sort, max_francis_steps=10_i4b )
!
!       EXTRACT THE APPROXIMATE TOP neig EIGENVECTORS OF a .
!
        eigvec(:n,:neig) = b(:n,:neig)
!
!       COMPUTE THE APPROXIMATE TOP neig EIGENVALUES OF a .
!
        eigval(:neig) = beta(:neig)*beta(:neig)
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( bt )
!
    else
!
        if ( use_nystrom ) then
!
!           RESTORE v AS CHOLESKY FACTORIZATION FAILED.
!
            v(:np,:np) = bt(:np,:np)    
!
!           DEALLOCATE WORK ARRAY.
!
            deallocate( bt )
!
        end if
!
!       USE A STANDARD SPECTRAL DECOMPOSITION.
!
        call eig_cmp( v(:np,:np), beta(:np), failure, sort=sort, maxiter=30_i4b )
!
        call eig_abs_sort( sort, beta(:np), v(:np,:np) )
!
!       COMPUTE THE APPROXIMATE TOP neig EIGENVECTORS OF a .
!!
#ifdef _BLAS
        call gemm( 'N', 'N', n, neig, np, one, q(1_i4b:n,1_i4b:np), n,              &
                   v(1_i4b:np,1_i4b:neig), np, zero, eigvec(1_i4b:n,1_i4b:neig), n  )
#else
        eigvec(:n,:neig) = matmul( q(:n,:np), v(:np,:neig) )
#endif
!
!       EXTRACT THE APPROXIMATE TOP neig EIGENVALUES OF a .
!
        eigval(:neig) = beta(:neig)
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (eigval(1_i4b:neig)/norma)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    relerr2 = sqrt( sum( (eigval0(neig+1_i4b:neig0)/norma)**2 ) )
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE EIGEN COUPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u(:n,:neig) - u(:n,:neig)*diag(eigval(:neig)),
!       WHERE u ARE THE EIGENVECTORS OF a.
!
        q(:n,:neig) = eigvec(:n,:neig)*spread(eigval(:neig),dim=1,ncopies=n)
!
#ifdef _BLAS
!        call gemm( 'N', 'N', n, neig, n, one, a(1_i4b:n,1_i4b:n), n,              &
!                   eigvec(1_i4b:n,1_i4b:neig), n, -one, q(1_i4b:n,1_i4b:neig), n  )
       call symm( 'L', 'U', n, neig, one, a(1_i4b:n,1_i4b:n), n,                  &
                   eigvec(1_i4b:n,1_i4b:neig), n, -one, q(1_i4b:n,1_i4b:neig), n  )
#else
        q(:n,:neig) = matmul( a(:n,:n), eigvec(:n,:neig)  ) - q(:n,:neig)
#endif
!
        beta(:neig) = norm( q(:n,:neig), dim=2_i4b )
!
        err1 = maxval( beta(:neig) )/( norma*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:neig)**(t)*u(:n,:neig).
!
        call unit_matrix( q(:neig,:neig) )
!
        v(:neig,:neig) = abs( q(:neig,:neig) - matmul( transpose(eigvec), eigvec ) )
!
        err2 = maxval( v(:neig,:neig) )/real(n,stnd)
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
#ifdef _BLAS
    deallocate( a, q, b, v, diagr, beta, eigval0, eigval, eigvec )
#else
    deallocate( a, q, qt, b, v, diagr, beta, eigval0, eigval, eigvec )
#endif
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric positive semi-definite matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_random_eig_pos_with_blas
! ===========================================
!
end program ex1_random_eig_pos_with_blas

ex1_random_eig_with_blas.F90

program ex1_random_eig_with_blas
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial EigenValue
!   Decomposition (EVD) of a symmetric matrix with randomized power subspace iterations
!   using STATPACK and BLAS subroutines.
!                                                                              
!                                                     
! Further Details
! ===============
!
!   The program also shows the use of subroutines QR_CMP, ORTHO_GEN_QR in module QR_Procedures,
!   EIG_CMP in module Eig_procedures, NORMAL_RANDOM_NUMBER3_ and GEN_RANDOM_SYM_MAT in module
!   Random, and gemm, symm, syrk generic interfaces in module BLAS_interfaces.
!                                                                              
! LATEST REVISION : 23/10/2020
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, one, seven, c30, merror, allocate_error, &
                         norm, unit_matrix, random_seed_, random_number_, normal_random_number3_,          &
                         eig_abs_sort, qr_cmp, ortho_gen_qr, eig_cmp, gen_random_sym_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
#ifdef _BLAS
!    use BLAS_interfaces, only : gemm
    use BLAS_interfaces, only : symm, syrk, gemm
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, neig IS THE TARGET RANK OF THE APPROXIMATE PARTIAL EVD DECOMPOSITION,
! neig0 IS THE RANK OF THE GENERATED MATRIX,
! n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=2000, neig=5, neig0=1000
!
    character(len=*), parameter :: name_proc='Example 1 of random_eig_with_blas'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, eps, elapsed_time, tmp, norma, relerr, relerr2
#ifdef _BLAS
    real(stnd), dimension(:,:), allocatable :: a, q, b, eigvec, v
#else
    real(stnd), dimension(:,:), allocatable :: a, q, qt, b, eigvec, v
#endif
    real(stnd), dimension(:),   allocatable :: diagr, beta, eigval0, eigval
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: p, np, i, niter, mat_type
!
    logical(lgl) :: failure, do_test, ortho
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUE DECOMPOSITION OF A n-BY-n REAL SYMMETRIC
!               MATRIX USING RANDOMIZED POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES      (IN ABSOLUTE MAGNITUDE)
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES      (IN ABSOLUTE MAGNITUDE)
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES  (IN ABSOLUTE MAGNITUDE)
!   mat_type > 3  -> VERY SLOW DECAY OF EIGENVALUES (IN ABSOLUTE MAGNITUDE)
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL EVD.
!
    eps = 0.001_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE EIGEN COUPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED EVD ALGORITHM.
!
!   DETERMINE THE NUMBER OF POWER ITERATIONS niter TO BE PERFORMED.
!
    niter = 6_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE p .
!
    p  = 20_i4b
!
!   CHECK VALIDITY OF THE OVERSAMPLING SIZE PARAMETER.
!
    np = min( p + neig, n )
!
!   SPECIFY IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS OF ACCURACY DUE
!   TO ROUNDING ERRORS. THIS IS NORMALLY NOT NEEDED FOR POSITIVE MATRIX.
!
    ortho = true
!
!   ALLOCATE WORK ARRAYS.
!
    i = max( np, neig0 )
!
#ifdef _BLAS
    allocate( a(n,n), q(n,np), b(n,np), v(np,np), eigvec(n,neig),         &
              diagr(np), beta(i), eigval0(neig0), eigval(neig), stat=iok  )
#else
    allocate( a(n,n), q(n,np), qt(np,n), b(n,np), v(np,np), eigvec(n,neig), &
              diagr(np), beta(i), eigval0(neig0), eigval(neig), stat=iok    )
#endif
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH
!   A SPECIFIED DISTRIBUTION OF EIGENVALUES.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE EIGENVALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            norma = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp = real( i - 1_i4b, stnd )
!
                eigval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!  
!   CHANGE SIGN OF HALF OF THE EIGENVALUES.
!
    call random_number_( beta(:neig0) )
!
    where ( beta(:neig0)>half ) eigval0(:neig0) = -eigval0(:neig0)
!
!   SORT THE EIGENVALUES BY DECREASING ABSOLUTE MAGNITUDE.
!
    call eig_abs_sort( sort, eigval0(:neig0) )
!
!   GENERATE A SYMMETRIC MATRIX a WITH THE SPECIFIED EIGENVALUES
!   AND RANK neig0.
!
    call gen_random_sym_mat( eigval0(:neig0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE SYMMETRIC MATRIX.
!
!    norma = norm( a(:n,:n) )
    norma = sqrt(sum( eigval0(:neig0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   GENERATE A n-BY-np IID GAUSSIAN MATRIX .
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number3_.
!
!    call normal_random_number3_( b(:n,:np) )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
    do i = 1_i4b, np
!
        call normal_random_number3_( b(:n,i) )
!
    end do
!
!   COMPUTE RANDOM SAMPLE MATRIX.
!
#ifdef _BLAS
!    call gemm( 'N', 'N', n, np, n, one, a(1_i4b:n,1_i4b:n), n,       &
!               b(1_i4b:n,1_i4b:np), n, zero, q(1_i4b:n,1_i4b:np), n  )
    call symm( 'L', 'U', n, np, one, a(1_i4b:n,1_i4b:n), n,          &
               b(1_i4b:n,1_i4b:np), n, zero, q(1_i4b:n,1_i4b:np), n  )
#else
    q(:n,:np) = matmul( a(:n,:n), b(:n,:np) )
#endif
!
!   DO POWER ITERATIONS.
!
    do i = 1_i4b, niter
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM SAMPLE MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( q(:n,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM SAMPLE MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( q(:n,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
#ifdef _BLAS
!        call gemm( 'N', 'N', n, np, n, one, a(1_i4b:n,1_i4b:n), n,       &
!                   q(1_i4b:n,1_i4b:np), n, zero, b(1_i4b:n,1_i4b:np), n  )
        call symm( 'L', 'U', n, np, one, a(1_i4b:n,1_i4b:n), n,          &
                   q(1_i4b:n,1_i4b:np), n, zero, b(1_i4b:n,1_i4b:np), n  )
#else
        b(:n,:np) = matmul( a(:n,:n), q(:n,:np)  )
#endif
!
        q(:n,:np) = b(:n,:np)
!
    end do
!
!   COMPUTE QR DECOMPOSITION OF RANDOM SAMPLE MATRIX TO OBTAIN AN
!   ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
    call qr_cmp( q(:n,:np), diagr(:np), beta(:np) )
!
!   GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM SAMPLE MATRIX.
!   q IS ASSUMED OF FULL RANK.
!
    call ortho_gen_qr( q(:n,:np), beta(:np) )
!
!   COMPUTE FINAL RANDOM SUBSPACE PROJECTION.
!
#ifdef _BLAS
!    call gemm( 'N', 'N', n, np, n, one, a(1_i4b:n,1_i4b:n), n,       &
!               q(1_i4b:n,1_i4b:np), n, zero, b(1_i4b:n,1_i4b:np), n  )
    call symm( 'L', 'U', n, np, one, a(1_i4b:n,1_i4b:n), n,          &
               q(1_i4b:n,1_i4b:np), n, zero, b(1_i4b:n,1_i4b:np), n  )
!
!   COMPUTE v = q**(t)*b = q**(t)*a*q .
!
    call gemm( 'T', 'N', np, np, n, one, q(1_i4b:n,1_i4b:np), n,       &
               b(1_i4b:n,1_i4b:np), n, zero, v(1_i4b:np,1_i4b:np), np  )
#else
    b(:n,:np) = matmul( a(:n,:n), q(:n,:np)  )
!
!   COMPUTE v = q**(t)*b = q**(t)*a*q .
!
    qt(:np,:n) = transpose( q(:n,:np) )
!
    v(:np,:np) = matmul( qt(:np,:n), b(:n,:np) )
#endif
!
!   USE A SPECTRAL DECOMPOSITION.
!
    call eig_cmp( v(:np,:np), beta(:np), failure, maxiter=30_i4b )
!
    call eig_abs_sort( sort, beta(:np), v(:np,:np) )
!
!   COMPUTE THE APPROXIMATE TOP neig EIGENVECTORS OF a .
!
#ifdef _BLAS
    call gemm( 'N', 'N', n, neig, np, one, q(1_i4b:n,1_i4b:np), n,              &
               v(1_i4b:np,1_i4b:neig), np, zero, eigvec(1_i4b:n,1_i4b:neig), n  )
#else
    eigvec(:n,:neig) = matmul( q(:n,:np), v(:np,:neig) )
#endif
!
!   EXTRACT THE APPROXIMATE TOP neig EIGENVALUES (IN ABSOLUTE MAGNITUDE) OF a .
!
    eigval(:neig) = beta(:neig)
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (eigval(1_i4b:neig)/norma)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    relerr2 = sqrt( sum( (eigval0(neig+1_i4b:neig0)/norma)**2 ) )
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE EIGEN COUPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u(:n,:neig) - u(:n,:neig)*diag(eigval(:neig)),
!       WHERE u ARE THE EIGENVECTORS OF a.
!
        q(:n,:neig) = eigvec(:n,:neig)*spread(eigval(:neig),dim=1,ncopies=n)
!
#ifdef _BLAS
!        call gemm( 'N', 'N', n, neig, n, one, a(1_i4b:n,1_i4b:n), n,              &
!                   eigvec(1_i4b:n,1_i4b:neig), n, -one, q(1_i4b:n,1_i4b:neig), n  )
       call symm( 'L', 'U', n, neig, one, a(1_i4b:n,1_i4b:n), n,                  &
                   eigvec(1_i4b:n,1_i4b:neig), n, -one, q(1_i4b:n,1_i4b:neig), n  )
#else
        q(:n,:neig) = matmul( a(:n,:n), eigvec(:n,:neig)  ) - q(:n,:neig)
#endif
!
        beta(:neig) = norm( q(:n,:neig), dim=2_i4b )
!
        err1 = maxval( beta(:neig) )/( norma*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:neig)**(t)*u(:n,:neig).
!
        call unit_matrix( q(:neig,:neig) )
!
        v(:neig,:neig) = abs( q(:neig,:neig) - matmul( transpose(eigvec), eigvec ) )
!
        err2 = maxval( v(:neig,:neig) )/real(n,stnd)
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
#ifdef _BLAS
    deallocate( a, q, b, v, diagr, beta, eigval0, eigval, eigvec )
#else
    deallocate( a, q, qt, b, v, diagr, beta, eigval0, eigval, eigvec )
#endif
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_random_eig_with_blas
! =======================================
!
end program ex1_random_eig_with_blas

ex1_random_number.F90

program ex1_random_number
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of intrinsic subroutine RANDOM_NUMBER
!   for generating arrays of real random numbers following a Uniform distribution.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd,  merror, allocate_error
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n1=10**4, n2=10**4
!
    character(len=*), parameter :: name_proc='Example 1 of intrinsic random_number'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: elapsed_time
    real(stnd), allocatable, dimension(:,:) :: real_mat
!
    integer(i4b) :: i, j
    integer      :: iok, istart, iend, irate, imax, itime
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   ALLOCATE WORK ARRAY.
!
    allocate( real_mat(n1,n2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   RESET THE SEEDS USED BY THE INTRINSIC UNIFORM RANDOM GENERATOR.
!
    call random_seed()
!
    call system_clock( count_rate=irate, count_max=imax  )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE AN UNIFORM RANDOM REAL MATRIX USING
!   SCALAR FORM OF INTRINSIC SUBROUTINE random_number.
!
    do i = 1_i4b, n2
!
        do j = 1_i4b, n1
!
             call random_number( real_mat(j,i) )
!
        end do
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random uniform real numbers with scalar form of intrinsic subroutine random_number is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE AN UNIFORM RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE random_number.
!
    do i = 1_i4b, n2
!
        call random_number( real_mat(:n1,i) )
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random uniform real numbers with vector form of intrinsic subroutine random_number is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE AN UNIFORM RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE random_number.
!
    call random_number( real_mat(:n1,:n2) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random uniform real numbers with matrix form of intrinsic subroutine random_number is',  &
      elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAY.
!
    deallocate( real_mat )
!
!
! END OF PROGRAM ex1_random_number
! ================================
!
end program ex1_random_number

ex1_random_number_.F90

program ex1_random_number_
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine RANDOM_NUMBER_ and
!   function RAND_NUMBER in module Random in module Random for generating arrays of
!   real random numbers following a Uniform distribution.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd,  merror, allocate_error, random_seed_,  &
                         rand_number, random_number_
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n1 AND n2 ARE THE DIMENSIONS OF THE GENERATED ARRAY.
!
    integer(i4b), parameter :: prtunit=6, n1=10**4, n2=10**4
!
    character(len=*), parameter :: name_proc='Example 1 of random_number_'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: elapsed_time
    real(stnd), allocatable, dimension(:,:) :: real_mat
!
    integer(i4b) :: i, j
    integer      :: iok, istart, iend, irate, imax, itime
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   ALLOCATE WORK ARRAY.
!
    allocate( real_mat(n1,n2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!   alg CAN BE CHOOSEN BETWEEN 1 AND 10.
!
    call random_seed_( alg=4 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
    call system_clock( count_rate=irate, count_max=imax  )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE AN UNIFORM RANDOM REAL MATRIX USING FUNCTION rand_number().
!
    do i = 1_i4b, n2
!
        do j = 1_i4b, n1
!
             real_mat(j,i) = rand_number( )
!
        end do
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random uniform real numbers with function rand_number() is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE AN UNIFORM RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE random_number_.
!
    do i = 1_i4b, n2
!
        call random_number_( real_mat(:n1,i) )
!
    end do
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random uniform real numbers with vector form of subroutine random_number_ is',  &
      elapsed_time, ' seconds'
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart )
!
!   GENERATE AN UNIFORM RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE random_number_.
!
    call random_number_( real_mat(:n1,:n2) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    write (prtunit,*) 
    write (*,'(a,i12,a,0pd12.4,a)')              &
      'The elapsed time for generating ', n1*n2, &
      ' random uniform real numbers with matrix form of subroutine random_number_ is',  &
      elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAY.
!
    deallocate( real_mat )
!
!
! END OF PROGRAM ex1_random_number_
! =================================
!
end program ex1_random_number_

ex1_random_svd.F90

program ex1_random_svd
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial SVD with randomized
!   subspace iterations using STATPACK.
!                                                                              
!                                                     
! Further Details
! ===============
!
!   The program shows the use of subroutines SVD_CMP in module SVD_Procedures,
!   QR_CMP and ORTHO_GEN_QR in module QR_Procedures, NORMAL_RANDOM_NUMBER3_ and
!   GEN_RANDOM_MAT in module Random.
!                                                                              
! LATEST REVISION : 23/10/2020
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, merror, allocate_error, &
                         norm, unit_matrix, random_seed_, normal_random_number3_, singval_sort,      &
                         qr_cmp, ortho_gen_qr, svd_cmp, gen_random_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE RANK OF THE GENERATED MATRIX,
! nsvd IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT
!
    integer(i4b), parameter :: prtunit=6, m=2000, n=1000, mn=min(m,n), nsvd0=1000, nsvd=5
!
    character(len=*), parameter :: name_proc='Example 1 of random_svd'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, norma, tmp, relerr, relerr2
    real(stnd), dimension(:,:), allocatable :: a, q, qt, b, bt, leftvec, rightvec
    real(stnd), dimension(:),   allocatable :: diagr, beta, singval, singval0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: p, np, niter, i, mat_type
!
    logical(lgl) :: failure, do_test, ortho
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A m-BY-n REAL MATRIX USING RANDOMIZED
!               POWER SUBSPACE ITERATIONS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL SVD.
!
    eps = 0.001_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED ALGORITHM.
!
!   DETERMINE THE NUMBER OF POWER ITERATIONS niter TO BE PERFORMED.
!
    niter = 5_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE p .
!
    p  = 10_i4b
!
!   CHECK VALIDITY OF THE OVERSAMPLING SIZE PARAMETER.
!
    np = min( p + nsvd, mn )
!
!   SPECIFY IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS OF ACCURACY DUE
!   TO ROUNDING ERRORS.
!
    ortho = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), q(m,np), qt(np,m), b(n,np), bt(np,n),        &
              diagr(np), beta(np), singval0(nsvd0), singval(nsvd), &
              leftvec(m,nsvd), rightvec(n,nsvd),  stat=iok  )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SORT THE SINGULAR VALUES.
!
    call singval_sort( sort, singval0(:nsvd0) )
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   GENERATE A n-BY-np IID GAUSSIAN MATRIX .
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number3_.
!
    call normal_random_number3_( b(:n,:np) )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
!    do i = 1_i4b, np
!
!        call normal_random_number3_( b(:n,i) )
!
!    end do
!
!   COMPUTE RANDOM SAMPLE MATRIX.
!
    q(:m,:np) = matmul( a(:m,:n), b(:n,:np) )
!
!   DO POWER ITERATIONS.
!
    do i = 1_i4b, niter
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( q(:m,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( q(:m,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
        qt(:np,:m) = transpose( q(:m,:np) )
!
        bt(:np,:n) = matmul( qt(:np,:m), a(:m,:n) )
!
        b(:n,:np) = transpose( bt(:np,:n) )
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( b(:n,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( b(:n,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
        q(:m,:np) = matmul( a(:m,:n), b(:n,:np) )
!
    end do
!
!   COMPUTE QR DECOMPOSITION OF RANDOM MATRIX TO OBTAIN AN
!   ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
    call qr_cmp( q(:m,:np), diagr(:np), beta(:np) )
!
!   GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM MATRIX.
!   q IS ASSUMED OF FULL RANK.
!
    call ortho_gen_qr( q(:m,:np), beta(:np) )
!
    qt(:np,:m) = transpose( q(:m,:np) )
!
    bt(:np,:n) = matmul( qt(:np,:m), a(:m,:n) )
!
!   COMPUTE SVD OF THE FINAL RANDOM MATRIX PROJECTION.
!
    call svd_cmp( bt(:np,:n), beta(:np), failure, v=b(:n,:np), sort=sort, max_francis_steps=10_i4b )
!
!   COMPUTE THE APPROXIMATE TOP nsvd LEFT SINGULAR VECTORS OF a .
!
    leftvec(:m,:nsvd) = matmul( q(:m,:np), bt(:np,:nsvd) )
!
!   EXTRACT THE APPROXIMATE TOP nsvd RIGHT SINGULAR VECTORS OF a .
!
    rightvec(:n,:nsvd) = b(:n,:nsvd)
!
!   EXTRACT THE APPROXIMATE TOP nsvd SINGULAR VALUES OF a .
!
    singval(:nsvd) = beta(:nsvd)
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (singval(:nsvd)/norma)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    relerr2 = sqrt(sum( (singval0(nsvd+1_i4b:nsvd0)/norma)**2 ))
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nsvd) - u(:n,:nsvd)*diag(singval(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        q(:m,:nsvd) = matmul(a,rightvec) - leftvec*spread(singval(:nsvd),dim=1,ncopies=m)
        beta(:nsvd) = norm( q(:m,:nsvd), dim=2_i4b )
!
        err1 = maxval( beta(:nsvd) )/( norma*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nsvd)**(t)*u(:n,:nsvd).
!
        call unit_matrix( q(:nsvd,:nsvd) )
!
        b(:nsvd,:nsvd) = abs( q(:nsvd,:nsvd) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( b(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsvd)**(t)*v(:m,:nsvd).
!
        b(:nsvd,:nsvd) = abs( q(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( b(:nsvd,:nsvd) )/real(n,stnd)
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, q, qt, b, bt, diagr, beta, singval0, singval, leftvec, rightvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_random_svd
! =============================
!
end program ex1_random_svd

ex1_random_svd_fixed_precision_with_blas.F90

program ex1_random_svd_fixed_precision_with_blas
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate reduced SVD with randomized
!   subspace iterations, which fullfills a given relative error in Frobenius norm using
!   STATPACK and BLAS subroutines.
!                                                                              
!                                                     
! Further Details
! ===============
!
!   The program shows the use of subroutines SVD_CMP in module SVD_Procedures,
!   QR_CMP and ORTHO_GEN_QR in module QR_Procedures, NORMAL_RANDOM_NUMBER3_ and
!   GEN_RANDOM_MAT in module Random and GEMM generic interface in module BLAS_interfaces.
!    
!                                                                              
! LATEST REVISION : 23/10/2020
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, two, three, seven, c30, allocate_error,  &
                         merror, norm, unit_matrix, random_seed_, normal_random_number3_, triang_solve,   &
                         singval_sort, qr_cmp, ortho_gen_qr, svd_cmp, gen_random_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
#ifdef _BLAS
    use BLAS_interfaces, only : gemm
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE RANK OF THE GENERATED MATRIX,
! relerr0 IS THE REQUESTED RELATIVE ERROR OF THE RESTRICTED SVD IN FROBENIUS NORM.
!
    integer(i4b), parameter :: prtunit=6, m=2000, n=2000, mn=min(m,n), nsvd0=1000
!   
    real(stnd), parameter  :: relerr0=0.10_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of random_svd_fixed_precision_with_blas'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, threshold, &
                  norma, erra, relerr, relerr2, tmp
#ifdef _BLAS
    real(stnd), dimension(:,:), allocatable :: a, q, b, h, r, r2, ti, ti2, yi, bi, v, leftvec, rightvec
#else
    real(stnd), dimension(:,:), allocatable :: a, q, qt, b, bt, r, r2, ti, ti2, yi, bi, leftvec, rightvec
#endif
    real(stnd), dimension(:),   allocatable :: diagr, beta, s, s0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: np, blksz, niter, maxiter, i, i0, i1, i2, j, nsvd, mat_type
!
    logical(lgl) :: failure_qb, failure_svd, do_test, ortho, reortho
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A m-BY-n REAL MATRIX USING RANDOMIZED
!               POWER SUBSPACE ITERATIONS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET THE TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM.
!
    eps = 0.01_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE QB ALGORITHM.
!   blksz*maxiter IS THE MAXIMUM ALLOWABLE RANK OF THE
!   PARTIAL SVD, WHICH IS SOUGHT.
!
    blksz   = 10_i4b
    maxiter = 20_i4b
!
    tmp = real( mn, stnd )/(three*real( blksz, stnd ))
    maxiter = min( maxiter, int( tmp, i4b ) )
!
    np = blksz*maxiter
!
!   DETERMINE THE NUMBER OF POWER ITERATIONS niter TO BE PERFORMED.
!
    niter = 1_i4b
!
!   SPECIFY IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS OF ACCURACY DUE
!   TO ROUNDING ERRORS.
!
    ortho = true
!
!   SPECIFY IF REORTHONORMALIZATION IS CARRIED OUT TO AVOID LOSS OF ORTHOGONALITY
!   IN THE BLOCK GRAM-SCHMIDT ORTHOGONALISATION SCHEME USED TO BUILD THE ORTHOGONAL MATRIX
!   OF THE QB DECOMPOSITION OF THE INPUT MATRIX.
!
    reortho = false
!
!   ALLOCATE WORK ARRAYS.
!
#ifdef _BLAS
    allocate( a(m,n), q(m,np), b(n,np), diagr(mn), beta(mn), s0(nsvd0),  &
              h(n,np), r(blksz,blksz), yi(m,blksz), ti(np,blksz),        &
              bi(blksz,n),  stat=iok     )
#else
    allocate( a(m,n), q(m,np), qt(np,m), b(n,np), bt(np,n), diagr(mn),        &
              beta(mn), s0(nsvd0), r(blksz,blksz), yi(m,blksz), ti(np,blksz), &
              bi(blksz,n),  stat=iok      )
#endif
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
    if ( reortho ) then
!
        allocate( r2(blksz,blksz), ti2(np,blksz), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                s0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SORT THE SINGULAR VALUES.
!
    call singval_sort( sort, s0(:nsvd0) )
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( s0(:nsvd0), a )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   TEST VALIDITY OF THE PRESET RELATIVE ACCURACY TOLERANCE relerr .
!
    tmp = two*sqrt( epsilon( relerr0 )/relerr0 )
    relerr = max( tmp, relerr0 )
!
!   COMPUTE THE FROBENIUS NORM OF THE INPUT MATRIX AND THE REQUIRED THRESHOLD.
!
!    norma = norm( a(1_i4b:m,1_i4b:n) )
    norma = sqrt(sum( s0(:nsvd0)**2 ) )
!
    erra = norma*norma
!
    threshold = erra*(relerr*relerr)
!
!        write (prtunit,*) erra, threshold
!
!   GENERATE A n-BY-np IID GAUSSIAN MATRIX .
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number3_.
!
    call normal_random_number3_( b(:n,:np) )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
!    do i = 1_i4b, np
!
!        call normal_random_number3_( b(:n,i) )
!
!    end do
!
!   COMPUTE RANDOM SAMPLE MATRIX.
!
#ifdef _BLAS
    call gemm( 'N', 'N', m, np, n, one, a(1_i4b:m,1_i4b:n), m,       &
               b(1_i4b:n,1_i4b:np), n, zero, q(1_i4b:m,1_i4b:np), m  )
#else
    q(:m,:np) = matmul( a(:m,:n), b(:n,:np) )
#endif
!
!   DO SUBSPACE ITERATIONS.
!
    do i = 1_i4b, niter
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( q(:m,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( q(:m,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
#ifdef _BLAS
        call gemm( 'T', 'N', n, np, m, one, a(1_i4b:m,1_i4b:n), m,       &
                   q(1_i4b:m,1_i4b:np), m, zero, b(1_i4b:n,1_i4b:np), n  )
#else
        qt(:np,:m) = transpose( q(:m,:np) )
!
        bt(:np,:n) = matmul( qt(:np,:m), a(:m,:n) )
!
        b(:n,:np) = transpose( bt(:np,:n) )
#endif
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( b(:n,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( b(:n,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
#ifdef _BLAS
        call gemm( 'N', 'N', m, np, n, one, a(1_i4b:m,1_i4b:n), m,       &
                   b(1_i4b:n,1_i4b:np), n, zero, q(1_i4b:m,1_i4b:np), m  )
#else
        q(:m,:np) = matmul( a(:m,:n), b(:n,:np) )
#endif
!
    end do
!
#ifdef _BLAS
    call gemm( 'T', 'N', n, np, m, one, a(1_i4b:m,1_i4b:n), m,       &
               q(1_i4b:m,1_i4b:np), m, zero, h(1_i4b:n,1_i4b:np), n  )
#else
    qt(:np,:m) = transpose( q(:m,:np) )
!
    bt(:np,:n) = matmul( qt(:np,:m), a(:m,:n) )
#endif
!
    i0   = 0_i4b
    nsvd = 0_i4b
!
    r(:blksz,:blksz)         = zero
    ti(1_i4b:np,1_i4b:blksz) = zero
!
    if ( reortho ) then
!
        r2(:blksz,:blksz) = zero
        ti2(1_i4b:np,1_i4b:blksz) = zero
!
    end if
!
    failure_qb = true
!
!   COMPUTE THE QB FACTORIZATION.
!
    iter : do i = 1_i4b, maxiter
!
        i1 = i0 + 1_i4b
        i2 = i0 + blksz
!
        if ( i==1_i4b ) then
!
            yi(:m,:blksz) = q(:m,i1:i2)
!
        else
!
!           PERFORM BLOCK GRAM-SCHMIDT ORTHOGONALISATION STEP.
!
#ifdef _BLAS
            call gemm( 'T', 'N', i0, blksz, n, one, b(1_i4b:n,1_i4b:i0), n,     &
                       b(1_i4b:n,i1:i2), n, zero, ti(1_i4b:np,1_i4b:blksz), np  )
!
            call gemm( 'N', 'N', m, blksz, i0, -one, q(1_i4b:m,1_i4b:i0), m,   &
                       ti(1_i4b:np,1_i4b:blksz), np, one, q(1_i4b:m,i1:i2), m  )
#else
            ti(:i0,:blksz) = matmul( bt(:i0,:n), b(:n,i1:i2) )
!
            q(:m,i1:i2) = q(:m,i1:i2) - matmul( q(:m,:i0), ti(:i0,:blksz) )
#endif
!            
            yi(:m,:blksz) = q(:m,i1:i2)
!
        end if
!
        call qr_cmp( q(:m,i1:i2), diagr(:blksz), beta(:blksz) )
!
!       SAVE UPPER TRIANGULAR MATRIX FOR LATER USE.
!
        if ( i==1_i4b .or. .not.reortho ) then
!
            do j = 1, blksz
!
                r(1_i4b:j-1_i4b,j) = q(1_i4b:j-1_i4b,i0+j)
                r(j,j)             = diagr(j)
!              r(j+1_i4b:blksz,j) = zero
!
            end do
!
        else
!
            do j = 1, blksz
!
                r2(1_i4b:j-1_i4b,j) = q(1_i4b:j-1_i4b,i0+j)
                r2(j,j)             = diagr(j)
!              r2(j+1_i4b:blksz,j) = zero
!
            end do
!
        end if
!
!       GENERATE COLUMNS i1 TO i2 OF THE ORTHOGONAL MATRIX.
!
        call ortho_gen_qr( q(:m,i1:i2), beta(:blksz) )
!
!       SAVE UPPER TRIANGULAR MATRIX FOR LATER USE.
!
        if ( i/=1_i4b ) then
!
            if ( reortho ) then
!
!               REORTHONORMALIZATION IS CARRIED OUT TO AVOID LOSS OF ORTHOGONALITY
!               IN THE BLOCK GRAM-SCHMIDT ORTHOGONALISATION SCHEME USED TO BUILD
!               THE ORTHOGONAL MATRIX OF THE QB DECOMPOSITION.
!
#ifdef _BLAS
                call gemm( 'T', 'N', i0, blksz, m, one, q(1_i4b:m,1_i4b:i0), m,     &
                           q(1_i4b:m,i1:i2), m, zero, ti2(1_i4b:np,1_i4b:blksz), np )
!
                call gemm( 'N', 'N', m, blksz, i0, -one, q(1_i4b:m,1_i4b:i0), m,    &
                           ti2(1_i4b:np,1_i4b:blksz), np, one, q(1_i4b:m,i1:i2), m  )
#else
                ti2(:i0,:blksz) = matmul( qt(:i0,:m), q(:m,i1:i2) )
!
                q(:m,i1:i2) = q(:m,i1:i2) - matmul( q(:m,:i0), ti2(:i0,:blksz) )
#endif
!
!               IMPROVE ESTIMATES OF COLUMNS i1 TO i2 OF THE ORTHOGONAL MATRIX.
!
                call qr_cmp( q(:m,i1:i2), diagr(:blksz), beta(:blksz) )
!
!               SAVE UPPER TRIANGULAR MATRIX FOR LATER USE.
!
                do j = 1, blksz
!
                    bi(1_i4b:j-1_i4b,j) = q(1_i4b:j-1_i4b,i0+j)
                    bi(j,j)             = diagr(j)
                    bi(j+1_i4b:blksz,j) = zero
!
                end do
!
!               UPDATE COLUMNS i1 TO i2 OF THE ORTHOGONAL MATRIX.
!
                call ortho_gen_qr( q(:m,i1:i2), beta(:blksz) )
!
#ifdef _BLAS
                call gemm( 'N', 'N', blksz, blksz, blksz, one, bi(1_i4b:blksz,1_i4b:blksz), blksz,     &
                           r2(1_i4b:blksz,1_i4b:blksz), blksz, zero, r(1_i4b:blksz,1_i4b:blksz), blksz )
!
                call gemm( 'T', 'N', i0, blksz, m, one, q(1_i4b:m,1_i4b:i0), m,          &
                           yi(1_i4b:m,1_i4b:blksz), m, one, ti(1_i4b:np,1_i4b:blksz), np )
#else
                r(:blksz,:blksz) = matmul( bi(:blksz,:blksz), r2(:blksz,:blksz) )
!
                ti(:i0,:blksz) = ti(:i0,:blksz) + matmul( qt(:i0,:m), yi(:m,:blksz) )
#endif
!
            end if
!
#ifdef _BLAS
!
            call gemm( 'N', 'N', n, blksz, i0, -one, b(1_i4b:n,1_i4b:i0), n,  &
                       ti(1_i4b:np,1_i4b:blksz), np, one, h(1_i4b:n,i1:i2), n )
#else
            bi(:blksz,:i0) = transpose( ti(:i0,:blksz) )
            bt(i1:i2,:n)   = bt(i1:i2,:n) - matmul( bi(:blksz,:i0), bt(:i0,:n) )
#endif
!
        end if
!
!       COMPUTE ROWS i1 TO i2 OF THE B FACTOR OF QB DECOMPOSITION.
!
#ifdef _BLAS
        bi(:blksz,:n) = transpose( h(:n,i1:i2) )
!
        call triang_solve( r(:blksz,:blksz), bi(:blksz,:n), upper=true, trans=true )
!
        b(:n,i1:i2)  = transpose( bi(:blksz,:n) )
!
#else
        bi(:blksz,:n) = bt(i1:i2,:n)
!
        call triang_solve( r(:blksz,:blksz), bi(:blksz,:n), upper=true, trans=true )
!
        bt(i1:i2,:n) = bi(:blksz,:n)
        qt(i1:i2,:m) = transpose( q(:m,i1:i2) )
#endif
!
!       COMPUTE CURRENT ERROR OF THE QB FACTORIZATION.
!
        tmp = norm( bi(:blksz,:n) )
        tmp = erra - tmp*tmp
!
!       CHECK IF THE THRESHOLD IS SATISFIED WITH INCLUSION OF THE CURRENT BLOCK.
!
        if ( tmp<threshold  ) then
!
            failure_qb = false
!
!           DETREMINE THE PRECISE RANK OF THE ORTHOGONAL MATRIX OF THE QB DECOMPOSITION.
!
            do j = 1, blksz
!
#ifdef _BLAS
                tmp = norm( b(:n,i0+j) )
#else
                tmp = norm( bi(j,:n) )
#endif
                erra = erra - tmp*tmp
!
                if ( erra<threshold  ) then
!
!                   nsvd IS THE NUMBER OF COLUMNS OF THE ORTHOGONAL MATRIX OF THE QB DECOMPOSITION.
!
                    nsvd = i0 + j
!
                    exit iter
!
                end if
!
            end do
!
        else
!
            erra = tmp
!
        end if
!
!        write (prtunit,*) erra, threshold
!
        i0 = i0 + blksz
!
    end do iter
!
    if ( failure_qb ) then
!
        nsvd = np
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
#ifdef _BLAS
    deallocate( h, r, yi, ti, bi, diagr )
#else
    deallocate( qt, r, yi, ti, bi, diagr )
#endif
!
    if ( reortho ) then
!
        deallocate(  r2, ti2 )
!
    end if
!
!   ALLOCATE WORK ARRAYS.
!
#ifdef _BLAS
    allocate( v(nsvd,nsvd), s(nsvd), leftvec(m,nsvd), rightvec(n,nsvd), stat=iok )
#else
    allocate( s(nsvd), leftvec(m,nsvd), rightvec(n,nsvd), stat=iok )
#endif
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   COMPUTE SVD OF THE FINAL QB FACTORIZATION.
!
#ifdef _BLAS
    call svd_cmp( b(:n,:nsvd), s(:nsvd), failure_svd, v=v(:nsvd,:nsvd), sort=sort, max_francis_steps=10_i4b )
!
!   COMPUTE THE APPROXIMATE TOP nsvd LEFT SINGULAR VECTORS OF a .
!
    call gemm( 'N', 'N', m, nsvd, nsvd, one, q(1_i4b:m,1_i4b:nsvd), m,               &
               v(1_i4b:nsvd,1_i4b:nsvd), nsvd, zero, leftvec(1_i4b:m,1_i4b:nsvd), m  )
!
!   EXTRACT THE APPROXIMATE TOP nsvd RIGHT SINGULAR VECTORS OF a .
!
    rightvec(:n,:nsvd) = b(:n,:nsvd)
#else
    call svd_cmp( bt(:nsvd,:n), s(:nsvd), failure_svd, v=rightvec(:n,:nsvd), sort=sort, max_francis_steps=10_i4b )
!
!   COMPUTE THE APPROXIMATE TOP nsvd LEFT SINGULAR VECTORS OF a .
!
    leftvec(:m,:nsvd) = matmul( q(:m,:nsvd), bt(:nsvd,:nsvd) )
#endif
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    relerr = sqrt( erra )/norma
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    relerr2 = sqrt(sum( (s0(nsvd+1_i4b:nsvd0)/norma)**2 ))
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nsvd) - u(:n,:nsvd)*diag(s(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        q(:m,:nsvd) = leftvec(:m,:nsvd)*spread( s(:nsvd), dim=1, ncopies=m)
!
#ifdef _BLAS
        call gemm( 'N', 'N', m, nsvd, n, one, a(1_i4b:m,1_i4b:n), m,                &
                   rightvec(1_i4b:n,1_i4b:nsvd), n, -one, q(1_i4b:m,1_i4b:nsvd), m  )
#else
        q(:m,:nsvd) = matmul( a(:m,:n), rightvec(:n,:nsvd) ) - q(:m,:nsvd)
#endif
!
        beta(:nsvd) = norm( q(:m,:nsvd), dim=2_i4b )
!
        if ( norma==zero ) then
            norma = one
        end if
!
        err1 = maxval( beta(:nsvd) )/( norma*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nsvd)**(t)*u(:n,:nsvd).
!
        call unit_matrix( q(:nsvd,:nsvd) )
!
        b(:nsvd,:nsvd) = abs( q(:nsvd,:nsvd) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( b(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsvd)**(t)*v(:m,:nsvd).
!
        b(:nsvd,:nsvd) = abs( q(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( b(:nsvd,:nsvd) )/real(n,stnd)
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
#ifdef _BLAS
    deallocate( a, q, b, v, leftvec, rightvec, beta, s, s0 )
#else
    deallocate( a, q, b, bt, leftvec, rightvec, beta, s, s0 )
#endif
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<eps .and. .not.failure_svd ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( failure_qb ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Fail to converge within ', maxiter,   &
                          ' iterations! ||A-rSVD||_F / ||A||_F = ', relerr, ' >= ', relerr0
        write (prtunit,*) 
!
    else
!
        write (prtunit,*) 
        write (prtunit,*) 'Converge with ', i ,' iterations! ||A-rSVD||_F / ||A||_F = ', &
                          relerr, ' < ', relerr0
        write (prtunit,*) 
!
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_random_svd_fixed_precision_with_blas
! =======================================================
!
end program ex1_random_svd_fixed_precision_with_blas

ex1_random_svd_with_blas.F90

program ex1_random_svd_with_blas
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial SVD with randomized
!   subspace iterations using STATPACK and BLAS subroutines.
!                                                                              
!                                                     
! Further Details
! ===============
!
!   The program shows the use of subroutines SVD_CMP in module SVD_Procedures,
!   QR_CMP and ORTHO_GEN_QR in module QR_Procedures, NORMAL_RANDOM_NUMBER3_ and
!   GEN_RANDOM_MAT in module Random and gemm generic interface in module BLAS_interfaces.
!    
!                                                                              
! LATEST REVISION : 23/10/2020
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, merror, allocate_error, &
                         norm, unit_matrix, random_seed_, normal_random_number3_, singval_sort,      &
                         qr_cmp, ortho_gen_qr, svd_cmp, gen_random_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
#ifdef _BLAS
    use BLAS_interfaces, only : gemm
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE RANK OF THE GENERATED MATRIX,
! nsvd IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT
!
    integer(i4b), parameter :: prtunit=6, m=2000, n=2000, mn=min(m,n), nsvd0=1000, nsvd=10
!
    character(len=*), parameter :: name_proc='Example 1 of random_svd_with_blas'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, norma, tmp, relerr, relerr2
#ifdef _BLAS
    real(stnd), dimension(:,:), allocatable :: a, q, b, v, leftvec, rightvec
#else
    real(stnd), dimension(:,:), allocatable :: a, q, qt, b, bt, leftvec, rightvec
#endif
    real(stnd), dimension(:),   allocatable :: diagr, beta, singval, singval0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: p, np, niter, i, mat_type
!
    logical(lgl) :: failure, do_test, ortho
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A m-BY-n REAL MATRIX USING RANDOMIZED
!               POWER SUBSPACE ITERATIONS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL SVD.
!
    eps = 0.001_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED ALGORITHM.
!
!   DETERMINE THE NUMBER OF POWER ITERATIONS niter TO BE PERFORMED.
!
    niter = 4_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE p .
!
    p  = 10_i4b
!
!   CHECK VALIDITY OF THE OVERSAMPLING SIZE PARAMETER.
!
    np = min( p + nsvd, mn )
!
!   SPECIFY IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS OF ACCURACY DUE
!   TO ROUNDING ERRORS.
!
    ortho = true
!
!   ALLOCATE WORK ARRAYS.
!
#ifdef _BLAS
    allocate( a(m,n), q(m,np), b(n,np), v(np,np),                        &
              diagr(nsvd0), beta(nsvd0), singval0(nsvd0), singval(nsvd), &
              leftvec(m,nsvd), rightvec(n,nsvd),  stat=iok )
#else
    allocate( a(m,n), q(m,np), qt(np,m), b(n,np), bt(np,n),              &
              diagr(nsvd0), beta(nsvd0), singval0(nsvd0), singval(nsvd), &
              leftvec(m,nsvd), rightvec(n,nsvd),  stat=iok )
#endif
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A GEOMETRIC DISTRIBUTION
!   OF EIGENVALUES WITH APPROXIMATE CONDITION NUMBER conda AND RANK nsvd0 .
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SORT THE SINGULAR VALUES.
!
    call singval_sort( sort, singval0(:nsvd0) )
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   GENERATE A n-BY-np IID GAUSSIAN MATRIX .
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING MATRIX
!   FORM OF SUBROUTINE normal_random_number3_.
!
    call normal_random_number3_( b(:n,:np) )
!
!   GENERATE A NORMAL RANDOM REAL MATRIX USING VECTOR
!   FORM OF SUBROUTINE normal_random_number_.
!
!    do i = 1_i4b, np
!
!        call normal_random_number3_( b(:n,i) )
!
!    end do
!
!   COMPUTE RANDOM SAMPLE MATRIX.
!
#ifdef _BLAS
    call gemm( 'N', 'N', m, np, n, one, a(1_i4b:m,1_i4b:n), m,       &
               b(1_i4b:n,1_i4b:np), n, zero, q(1_i4b:m,1_i4b:np), m  )
#else
    q(:m,:np) = matmul( a(:m,:n), b(:n,:np) )
#endif
!
!   DO POWER ITERATIONS.
!
    do i = 1_i4b, niter
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( q(:m,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( q(:m,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
#ifdef _BLAS
        call gemm( 'T', 'N', n, np, m, one, a(1_i4b:m,1_i4b:n), m,       &
                   q(1_i4b:m,1_i4b:np), m, zero, b(1_i4b:n,1_i4b:np), n  )
#else
        qt(:np,:m) = transpose( q(:m,:np) )
!
        bt(:np,:n) = matmul( qt(:np,:m), a(:m,:n) )
!
        b(:n,:np) = transpose( bt(:np,:n) )
#endif
!
        if ( ortho ) then
!
!           COMPUTE QR DECOMPOSITION OF RANDOM MATRIX TO OBTAIN AN
!           ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
            call qr_cmp( b(:n,:np), diagr(:np), beta(:np) )
!
!           GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM MATRIX.
!           q IS ASSUMED OF FULL RANK.
!
            call ortho_gen_qr( b(:n,:np), beta(:np) )
!
        end if
!
!       COMPUTE RANDOM SUBSPACE PROJECTION.
!
#ifdef _BLAS
        call gemm( 'N', 'N', m, np, n, one, a(1_i4b:m,1_i4b:n), m,       &
                   b(1_i4b:n,1_i4b:np), n, zero, q(1_i4b:m,1_i4b:np), m  )
#else
        q(:m,:np) = matmul( a(:m,:n), b(:n,:np) )
#endif
!
    end do
!
!   COMPUTE QR DECOMPOSITION OF RANDOM MATRIX TO OBTAIN AN
!   ORTHONORMAL BASIS OF RANDOM SUBSPACE.
!
    call qr_cmp( q(:m,:np), diagr(:np), beta(:np) )
!
!   GENERATE ORTHOGONAL MATRIX q OF QR DECOMPOSITION OF RANDOM MATRIX.
!   q IS ASSUMED OF FULL RANK.
!
    call ortho_gen_qr( q(:m,:np), beta(:np) )
!
#ifdef _BLAS
    call gemm( 'T', 'N', n, np, m, one, a(1_i4b:m,1_i4b:n), m,       &
               q(1_i4b:m,1_i4b:np), m, zero, b(1_i4b:n,1_i4b:np), n  )
!
!   COMPUTE SVD OF THE FINAL RANDOM MATRIX PROJECTION.
!
    call svd_cmp( b(:n,:np), beta(:np), failure, v=v(:np,:np), sort=sort, max_francis_steps=10_i4b )
!
!   COMPUTE THE APPROXIMATE TOP nsvd LEFT SINGULAR VECTORS OF a .
!
    call gemm( 'N', 'N', m, nsvd, np, one, q(1_i4b:m,1_i4b:np), m,             &
               v(1_i4b:np,1_i4b:np), np, zero, leftvec(1_i4b:m,1_i4b:nsvd), m  )
#else
    qt(:np,:m) = transpose( q(:m,:np) )
!
    bt(:np,:n) = matmul( qt(:np,:m), a(:m,:n) )
!
!   COMPUTE SVD OF THE FINAL RANDOM MATRIX PROJECTION.
!
    call svd_cmp( bt(:np,:n), beta(:np), failure, v=b(:n,:np), sort=sort, max_francis_steps=10_i4b )
!
!   COMPUTE THE APPROXIMATE TOP nsvd LEFT SINGULAR VECTORS OF a .
!
    leftvec(:m,:nsvd) = matmul( q(:m,:np), bt(:np,:nsvd) )
#endif
!
!   EXTRACT THE APPROXIMATE TOP nsvd RIGHT SINGULAR VECTORS OF a .
!
    rightvec(:n,:nsvd) = b(:n,:nsvd)
!
!   EXTRACT THE APPROXIMATE TOP nsvd SINGULAR VALUES OF a .
!
    singval(:nsvd) = beta(:nsvd)
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (singval(:nsvd)/norma)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    relerr2 = sqrt(sum( (singval0(nsvd+1_i4b:nsvd0)/norma)**2 ))
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nsvd) - u(:n,:nsvd)*diag(singval(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        q(:m,:nsvd) = leftvec(:m,:nsvd)*spread( singval(:nsvd), dim=1, ncopies=m)
!
#ifdef _BLAS
        call gemm( 'N', 'N', m, nsvd, n, one, a(1_i4b:m,1_i4b:n), m,                &
                   rightvec(1_i4b:n,1_i4b:nsvd), n, -one, q(1_i4b:m,1_i4b:nsvd), m  )
#else
        q(:m,:nsvd) = matmul( a(:m,:n), rightvec(:n,:nsvd) ) - q(:m,:nsvd)
#endif
!
        beta(:nsvd) = norm( q(:m,:nsvd), dim=2_i4b )
!
        err1 = maxval( beta(:nsvd) )/( norma*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nsvd)**(t)*u(:n,:nsvd).
!
        call unit_matrix( q(:nsvd,:nsvd) )
!
        b(:nsvd,:nsvd) = abs( q(:nsvd,:nsvd) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( b(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsvd)**(t)*v(:m,:nsvd).
!
        b(:nsvd,:nsvd) = abs( q(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( b(:nsvd,:nsvd) )/real(n,stnd)
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
#ifdef _BLAS
    deallocate( a, q, b, v, diagr, beta, singval0, singval, leftvec, rightvec )
#else
    deallocate( a, q, qt, b, bt, diagr, beta, singval0, singval, leftvec, rightvec )
#endif
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_random_svd_with_blas
! =======================================
!
end program ex1_random_svd_with_blas

ex1_real_fft.F90

program ex1_real_fft
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine REAL_FFT
!   in module FFT_Procedures for computing the Fast Fourier Transform (FFT)
!   of a real sequence.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, true, false, init_fft, fft, real_fft, end_fft, &
                         merror, allocate_error
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE REAL SEQUENCE
! AND MUST BE AN EVEN POSITIVE INTEGER.
!
    integer(i4b), parameter :: prtunit=6, n=100000
!
    character(len=*), parameter :: name_proc='Example 1 of real_fft'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err, eps, elapsed_time
    real(stnd), dimension(:), allocatable :: y, y2
!
    complex(stnd), dimension(:), allocatable :: yt
!
    integer(i4b) :: nd2
    integer      :: iok, istart, iend, irate, imax, itime
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FORWARD FFT OF A REAL SEQUENCE.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( y(n), y2(n), yt(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF EVEN LENGTH n .
!
    call random_number( y(:n) )
!
!   INITIALIZE THE REAL_FFT SUBROUTINE.
!
    nd2   = n/2
!
    call init_fft( nd2 )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   TRANSFORM THE REAL SEQUENCE.
!
    call real_fft( y(:n), yt(:nd2+1), forward=true  )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    call end_fft()
!
!   COMPUTE THE REMAINING VALUES OF THE FOURIER TRANSFORM.
!
    yt(n:nd2+2:-1) = conjg( yt(2:nd2) )
!
!   INITIALIZE THE FFT SUBROUTINE FOR THE BACKWARD TRANSFORM.
!
    call init_fft( n )
!
!   INVERT THE SEQUENCE BACK.
!
    call fft( yt(:n), forward=false )
!
    call end_fft()
!
!   CHECK THAT INVERSE(TRANSFORM(SEQUENCE)) = SEQUENCE.
!
    y2(:n) = real( yt(:n) )
!
    err = maxval(abs(y(:n)-y2(:n)))/maxval(abs(y(:n)))
!
!   DEALLOCATE ARRAYS.
!
    deallocate( y, y2, yt )
!
    if ( err<=eps  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i10,a,0pd12.4,a)')    &
      'The elapsed time for computing the forward FFT of a real sequence of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_real_fft
! ===========================
!
end program ex1_real_fft

ex1_real_fft_forward.F90

program ex1_real_fft_forward
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine REAL_FFT_FORWARD 
!   and REAL_FFT_BACKWARD in module FFT_Procedures for computing the Fourier
!   Transform of a real sequence by the Goertzel method.
!
!
! LATEST REVISION : 01/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, real_fft_forward, real_fft_backward, merror, allocate_error
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE REAL SEQUENCE.
!
    integer(i4b), parameter :: prtunit=6, n=100000
!
    character(len=*), parameter :: name_proc='Example 1 of real_fft_forward'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                            :: err, eps, elapsed_time
    real(stnd), dimension(:), allocatable :: y, y2, yi, yr
!
    integer(i4b) :: nd2p1
    integer      :: iok, istart, iend, irate, imax, itime
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FORWARD AND BACWARD FFTS OF A REAL SEQUENCE BY THE GOERTZEL METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
!
    nd2p1 = (n/2) + 1
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( y(n), y2(n), yi(nd2p1), yr(nd2p1), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE.
!
    call random_number( y(:n) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   TRANSFORM THE REAL SEQUENCE.
!
    call real_fft_forward( y(:n), yr(:nd2p1), yi(:nd2p1)  )
!
!   INVERT THE SEQUENCE BACK.
!
    call real_fft_backward( yr(:nd2p1), yi(:nd2p1), y2(:n) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK THAT INVERSE(TRANSFORM(SEQUENCE)) = SEQUENCE.
!
    err = maxval(abs(y(:n)-y2(:n)))/maxval(abs(y(:n)))
!
!   DEALLOCATE ARRAYS.
!
    deallocate( y, y2, yi, yr )
!
    if ( err<=eps  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i10,a,0pd12.4,a)')    &
      'The elapsed time for computing the forward and backward FFTs of a real sequence of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_real_fft_forward
! ===================================
!
end program ex1_real_fft_forward

ex1_reig_cmp.F90

program ex1_reig_cmp
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial EigenValue Decomposition (EVD)
!   of a symmetric matrix with randomized power, subspace or block Krylov iterations using
!   subroutine REIG_CMP in module Eig_Procedures.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, one, seven, c30, c1_e6, merror,    &
                         allocate_error, norm, unit_matrix, random_seed_, random_number_,            &
                         eigval_abs_sort, reig_cmp, gen_random_sym_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX,
! neig0 IS THE RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO n),
! neig IS THE TARGET RANK OF THE APPROXIMATE PARTIAL EVD DECOMPOSITION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig=10, neig0=1000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of reig_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, eps, elapsed_time, tmp, tmp2, anorm, relerr, relerr2
    real(stnd), dimension(:,:), allocatable :: a, eigvec, res, id
    real(stnd), dimension(:),   allocatable :: eigval0, eigval, beta
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, niter, nover, mat_type
!
    logical(lgl) :: failure, extd_samp, do_test, ortho
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUE DECOMPOSITION OF A n-BY-n REAL SYMMETRIC
!               MATRIX USING RANDOMIZED POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES      (IN ABSOLUTE MAGNITUDE)
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES      (IN ABSOLUTE MAGNITUDE)
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES  (IN ABSOLUTE MAGNITUDE)
!   mat_type = 4  -> VERY SLOW DECAY OF EIGENVALUES (IN ABSOLUTE MAGNITUDE)
!   mat_type = 5  -> STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF EIGENVALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL EVD.
!
    eps = 0.01_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE EIGEN COUPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED EVD ALGORITHM.
!
!   DETERMINE THE NUMBER OF POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS niter .
!
    niter = 6_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover .
!
    nover = 10_i4b
!
!   SPECIFY IF POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS ARE USED.
!
    extd_samp = false
!
!   DETERMINE IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE OR BLOCK KRYLOV ITERATIONS, TO AVOID LOSS
!   OF ACCURACY DUE TO ROUNDING ERRORS. THIS IS NOT NEEDED NORMALLY
!   FOR SEMI-DEFINITE POSITIVE MATRIX.
!
    ortho = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), eigvec(n,neig), eigval(neig),  &
              eigval0(neig0), beta(neig0), stat=iok  )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE EIGENVALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            tmp2 = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp = real( i - 1_i4b, stnd )
!
                eigval0(i) = exp( -tmp/tmp2 )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE.
!
            eigval0(:neig0-1_i4b) = one
            eigval0(neig0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                eigval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, neig0
!
                eigval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF EIGENVALUES.
!
            call random_number_( eigval0(:neig0) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( eigval0 ) ) then
!
        if ( .not.all( ieee_is_normal( eigval0(:neig0) ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input symmetric matrix !'  )                    
!
        end if
!
    end if
#endif
!  
!   CHANGE SIGN OF HALF OF THE EIGENVALUES.
!
    call random_number_( beta(:neig0) )
!
    where ( beta(:neig0)>half ) eigval0(:neig0) = -eigval0(:neig0)
!
!   SORT THE EIGENVALUES BY DECREASING ABSOLUTE MAGNITUDE.
!
    call eigval_abs_sort( sort, eigval0(:neig0) )
!
!   GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF EIGENVALUES AND A RANK EQUAL TO neig0.
!
    call gen_random_sym_mat( eigval0(:neig0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
    anorm = norm( eigval0(:neig0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   reig_cmp COMPUTES A PARTIAL EIGENVALUE DECOMPOSITION (EVD) OF A REAL
!   n-BY-n SYMMETRIC MATRIX a. THE PARTIAL EVD IS WRITTEN
!
!                       U * S * U**(t)
!
!   WHERE S IS AN neig-BY-neig MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN n-BY-neig ORTHONORMAL MATRIX. THE DIAGONAL
!   ELEMENTS OF S ARE THE neig LARGEST EIGENVALUES OF a IN DECREASING ABSOLUTE
!   MAGNITUDE ORDER. THE COLUMNS OF U ARE THE ASSOCIATED EIGENVECTORS OF a.
!
    call reig_cmp( a(:n,:n), eigval(:neig), eigvec(:n,:neig), failure=failure, &
                   niter=niter, nover=nover, ortho=ortho, extd_samp=extd_samp    )
!
!   THE ROUTINE RETURNS THE neig LARGEST EIGENVALUES (IN ABSOLUTE MAGNITUDE) AND THE
!   ASSOCIATED EIGENVECTORS.
!
!   ON EXIT OF reig_cmp :
!
!       eigval CONTAINS THE neig LARGEST EIGENVALUES OF a IN DECREASING ORDER OF
!       ABSOLUTE MAGNITUDE.
!
!       eigvec CONTAINS THE ASSOCIATED neig EIGENVECTORS,
!       STORED COLUMNWISE;
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE RANDOMIZED
!                         POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS. THE RESULTS
!                         CAN BE STILL USEFUL, BUT THE APPROXIMATIONS OF THE neig
!                         TOP EIGEN COUPLETS CAN BE POOR.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (eigval(1_i4b:neig)/anorm)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    if ( neig0>neig ) then
        relerr2 = norm( eigval0(neig+1_i4b:neig0)/anorm )
    else
        relerr2 = zero
    end if
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE EIGEN COUPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(n,neig), id(neig,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u(:n,:neig) - u(:n,:neig)*diag(eigval(:neig)),
!       WHERE u ARE THE EIGENVECTORS OF a.
!
        res(:n,:neig)   = matmul(a,eigvec) - eigvec*spread(eigval(:neig),dim=1,ncopies=n)
        id(:neig,1_i4b) = norm( res(:n,:neig), dim=2_i4b )
!
        err1 = maxval( id(:neig,1_i4b) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:neig)**(t)*u(:n,:neig).
!
        call unit_matrix( id(:neig,:neig) )
!
        res(:neig,:neig) = abs( id(:neig,:neig) - matmul( transpose(eigvec), eigvec ) )
!
        err2 = maxval( res(:neig,:neig) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, eigval0, eigval, eigvec, beta )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered eigenvalues at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of eigenvalues'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial EVD                                  = ', &
                      neig
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-rEVD||_F / ||A||_F        = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rEVD||_F / ||A||_F ) = ', &
                      relerr2
    write (prtunit,*) 
!
    write (prtunit,*) 
    write (prtunit,*) 'FAILURE ( from reig_cmp() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_reig_cmp
! ===========================
!
end program ex1_reig_cmp

ex1_reig_pos_cmp.F90

program ex1_reig_pos_cmp
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial EigenValue Decomposition (EVD)
!   of a symmetric positive semi-definite matrix with randomized power, subspace or block Krylov
!   iterations, and the Nystrom method, using subroutine REIG_POS_CMP in module SVD_Procedures.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror,    &
                         allocate_error, norm, unit_matrix, random_seed_, random_number_,      &
                         eigval_sort, reig_pos_cmp, gen_random_sym_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX,
! neig0 IS THE RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO n),
! neig IS THE TARGET RANK OF THE APPROXIMATE PARTIAL EVD DECOMPOSITION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig=10, neig0=1000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of reig_pos_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, eps, elapsed_time, tmp, tmp2, anorm, relerr, relerr2
    real(stnd), dimension(:,:), allocatable :: a, eigvec, res, id
    real(stnd), dimension(:),   allocatable :: eigval0, eigval
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, niter, nover, mat_type
!
    logical(lgl) :: failure, extd_samp, do_test, ortho, use_nystrom
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUE DECOMPOSITION OF A n-BY-n REAL SYMMETRIC POSITIVE
!               SEMI-DEFINITE MATRIX USING RANDOMIZED POWER SUBSPACE OR BLOCK KRYLOV
!               ITERATIONS AND THE NYSTROM METHOD.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES
!   mat_type = 4  -> VERY SLOW DECAY OF EIGENVALUES
!   mat_type = 5  -> STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF EIGENVALUES
!
    mat_type = 5_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL EVD.
!
    eps = 0.001_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE EIGEN COUPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED EVD ALGORITHM.
!
!   DETERMINE THE NUMBER OF POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS niter .
!
    niter = 8_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover .
!
    nover = 10_i4b
!
!   SPECIFY IF POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS ARE USED.
!
    extd_samp = true
!
!   DETERMINE IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE OR BLOCK KRYLOV ITERATIONS, TO AVOID LOSS
!   OF ACCURACY DUE TO ROUNDING ERRORS. THIS IS NOT NEEDED NORMALLY
!   FOR SEMI-DEFINITE POSITIVE MATRIX.
!
    ortho = true
!
!   SPECIFY IF LAST STEP OF THE ALGORITHM IS PERFORMED WITH THE NYSTROM
!   METHOD (AND A SVD) OR AN EIGENVALUE DECOMPOSITION.
!
    use_nystrom = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), eigvec(n,neig), eigval(neig), eigval0(neig0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM REAL SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX a WITH A
!   SPECIFIED DISTRIBUTION OF EIGENVALUES.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE EIGENVALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            tmp2 = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp = real( i - 1_i4b, stnd )
!
                eigval0(i) = exp( -tmp/tmp2 )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE.
!
            eigval0(:neig0-1_i4b) = one
            eigval0(neig0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                eigval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, neig0
!
                eigval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF EIGENVALUES.
!
            call random_number_( eigval0(:neig0) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( eigval0 ) ) then
!
        if ( .not.all( ieee_is_normal( eigval0(:neig0) ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input symmetric matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   SORT THE EIGENVALUES BY DECREASING MAGNITUDE.
!
    call eigval_sort( sort, eigval0(:neig0) )
!
!   GENERATE A SYMMETRIC MATRIX a WITH THE SPECIFIED EIGENVALUES AND RANK neig0.
!
    call gen_random_sym_mat( eigval0(:neig0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE SYMMETRIC POSITIVE MATRIX.
!
    anorm = norm( eigval0(:neig0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   reig_pos_cmp COMPUTES A PARTIAL EIGENVALUE DECOMPOSITION (EVD) OF A REAL
!   n-BY-n SYMMETRIC POSITIVE SEMI-DEFINITE MATRIX a. THE PARTIAL EVD IS WRITTEN
!
!                       U * S * U**(t)
!
!   WHERE S IS AN neig-BY-neig MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN n-BY-neig ORTHONORMAL MATRIX. THE DIAGONAL
!   ELEMENTS OF S ARE THE neig LARGEST EIGENVALUES OF a IN DECREASING MAGNITUDE ORDER.
!   THE COLUMNS OF U ARE THE ASSOCIATED EIGENVECTORS OF a.
!
    call reig_pos_cmp( a(:n,:n), eigval(:neig), eigvec(:n,:neig), failure=failure,  &
                       niter=niter, nover=nover, ortho=ortho, extd_samp=extd_samp,  &
                       use_nystrom=use_nystrom )
!
!   THE ROUTINE RETURNS THE neig LARGEST EIGENVALUES AND THE ASSOCIATED EIGENVECTORS.
!
!   ON EXIT OF reig_pos_cmp :
!
!       eigval CONTAINS THE neig LARGEST EIGENVALUES OF a IN DECREASING ORDER.
!
!       eigvec CONTAINS THE ASSOCIATED neig EIGENVECTORS,
!       STORED COLUMNWISE;
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE RANDOMIZED
!                         POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS. THE RESULTS
!                         CAN BE STILL USEFUL, BUT THE APPROXIMATIONS OF THE neig
!                         TOP EIGEN COUPLETS CAN BE POOR.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (eigval(1_i4b:neig)/anorm)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    if ( neig0>neig ) then
        relerr2 = norm( eigval0(neig+1_i4b:neig0)/anorm )
    else
        relerr2 = zero
    end if
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE EIGEN COUPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(n,neig), id(neig,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u(:n,:neig) - u(:n,:neig)*diag(eigval(:neig)),
!       WHERE u ARE THE EIGENVECTORS OF a.
!
        res(:n,:neig)   = matmul(a,eigvec) - eigvec*spread(eigval(:neig),dim=1,ncopies=n)
        id(:neig,1_i4b) = norm( res(:n,:neig), dim=2_i4b )
!
        err1 = maxval( id(:neig,1_i4b) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:neig)**(t)*u(:n,:neig).
!
        call unit_matrix( id(:neig,:neig) )
!
        res(:neig,:neig) = abs( id(:neig,:neig) - matmul( transpose(eigvec), eigvec ) )
!
        err2 = maxval( res(:neig,:neig) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, eigval0, eigval, eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered eigenvalues at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of eigenvalues'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial EVD                                  = ', &
                      neig
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-rEVD||_F / ||A||_F        = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rEVD||_F / ||A||_F ) = ', &
                      relerr2
    write (prtunit,*) 
!
    write (prtunit,*) 
    write (prtunit,*) 'FAILURE ( from reig_pos_cmp() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric positive semi-definite matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_reig_pos_cmp
! ===============================
!
end program ex1_reig_pos_cmp

ex1_reorder.F90

program ex1_reorder
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines DO_INDEX and REORDER
!   in module Sort_Procedures for sorting integer and real sequences by means of an index.
!
!
! LATEST REVISION : 29/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, false, arth, do_index, reorder
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE ORDER OF THE SEQUENCES.
!
    integer(i4b), parameter :: prtunit=6, n=100
!
    character(len=*), parameter :: name_proc='Example 1 of reorder'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n) :: x
!
    integer(i4b)               :: i, j, k, i1, i2
    integer(i4b), dimension(n) :: y, indexx, indexy
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE RANDOM REAL DATA TO SORT.
!
    call random_number( x )
!
!   INITIALIZE PERMUTATION TO THE IDENTITY.
!
    y = arth( 1_i4b, 1_i4b, n ) 
!    
!   GENERATE A RANDOM ORDERING OF THE INTEGERS 1 TO n.
!   STARTING AT THE END, SWAP THE CURRENT LAST INDICATOR WITH ONE
!   RANDOMLY CHOSEN FROM THOSE PRECEEDING IT.

    do i = n, 2, -1
        j = 1 + i * x(i)
        if (j < i) then
            k    = y(i)
            y(i) = y(j)
            y(j) = k
        end if
    end do
!
!   COMPUTE INDEX FOR EACH ARRAY.
!
    call do_index( x, indexx )
    call do_index( y, indexy )
!
!   EXAMPLE 1 : SORT THE REAL DATA IN ASCENDING ORDER 
!               BY MEANS OF THE INDEX .
!
    call reorder( indexx, x  )
!
!   CHECK THAT THE SORTED ARRAY IS NON-DECREASING.
!
    i1 = count( x(1:n-1) > x(2:n) )
!
!   EXAMPLE 2 : SORT THE INTEGER DATA IN DECREASING ORDER
!               BY MEANS OF THE INDEX .
!
    call reorder( indexy, y, ascending=false )
!
!   CHECK THAT THE SORTED ARRAY IS NON-INCREASING.
!
    i2 = count( y(1:n-1) < y(2:n) )
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( i1==0 .and. i2==0 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_reorder
! ==========================
!
end program ex1_reorder

ex1_rqb_cmp.F90

program ex1_rqb_cmp
!
!
! Purpose
! =======
!
!   This program illustrates how to compute a randomized partial QB or QR factorization of a matrix
!   using subroutine RQB_CMP in module Random.
!                                                                              
!                                                                              
! LATEST REVISION : 05/02/2021
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, ten, c30, merror, allocate_error,  &
                         norm, unit_matrix, random_seed_, singval_sort, rqb_cmp, gen_random_mat
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX,
! nqb IS THE TARGET RANK OF THE RANDOMIZED PARTIAL QB OR QR FACTORIZATION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, m=10000, n=10000, nsvd0=1000, nqb=5
!
    character(len=*), parameter :: name_proc='Example 1 of rqb_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, eps, elapsed_time, norma, normb, &
                                               tmp, relerr, relerr2
    real(stnd), dimension(:,:), allocatable :: a, q, b, res, id
    real(stnd), dimension(:),   allocatable :: singval0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: niter, i, mat_type
    integer(i4b), allocatable, dimension(:) :: ip
!
    logical(lgl) :: ortho, comp_qr, pivoting, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL QB OR QR FACTORIZATION OF A m-BY-n REAL MATRIX USING A RANDOMIZED
!               ALGORITHM AS
!
!                              a(:m,:n) ≈ q(:m,:nqb)*b(:nqb,:n)
!
!               WHERE q IS A m-BY-nqb MATRIX WITH ORTHONORMAL COLUMNS, b IS A nqb-BY-n MATRIX AND
!               THE PRODUCT q*b IS A GOOD APPROXIMATION OF a ACCORDING TO THE SPECTRAL OR FROBENIUS
!               NORM.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL QB OR QR FACTORIZATION.
!
    eps = 0.05_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ORTHOGONALITY OF THE q MATRIX.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED QB OR QR ALGORITHM.
!
!   DETERMINE THE NUMBER OF SUBSPACE ITERATIONS niter TO BE PERFORMED.
!
    niter = 2_i4b
!
!   DETERMINE IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS
!   OF ACCURACY DUE TO ROUNDING ERRORS.
!
    ortho = true
!
!   DETERMINE IF A QB OR QR FACTORIZATION IS COMPUTED.
!
    comp_qr = true
!
!   DETERMINE IF A QR FACTORIZATION WITH COLUMN PIVOTING IS COMPUTED.
!
    pivoting = true
!
!   ALLOCATE WORK ARRAYS.
!
    if ( pivoting ) then
!
        allocate( a(m,n), q(m,nqb), b(nqb,n), ip(n), singval0(nsvd0), stat=iok )
!
    else
!
        allocate( a(m,n), q(m,nqb), b(nqb,n), singval0(nsvd0), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SORT THE SINGULAR VALUES.
!
    call singval_sort( sort, singval0(:nsvd0) )
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rqb_cmp COMPUTES AN APPROXIMATE PARTIAL QB OR QR DECOMPOSITION OF A REAL m-BY-n MATRIX a
!   USING A RANDOMIZED ALGORITHM AND SUBSPACE ITERATIONS. THE PARTIAL QB OR QR DECOMPOSITION
!   IS WRITTEN
!
!                  a(:m,:n) ≈ q(:m,:nqb)*b(:nqb,:n)
!
!   WHERE q IS A m-BY-nqb MATRIX WITH ORTHONORMAL COLUMNS, b IS A nqb-BY-n (FULL OR TRAPEZOIDAL)
!   MATRIX AND THE PRODUCT q*b IS A GOOD APPROXIMATION OF a ACCORDING TO THE SPECTRAL OR FROBENIUS
!   NORM.
!
    if ( pivoting ) then
!
        call rqb_cmp( a(:m,:n), q(:m,:nqb), b(:nqb,:n), niter=niter, ortho=ortho, ip=ip(:n) )
!
    else
!
        call rqb_cmp( a(:m,:n), q(:m,:nqb), b(:nqb,:n), niter=niter, ortho=ortho, comp_qr=comp_qr )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR OF THE QB OR QR DECOMPOSITION.
!
    normb = norm( b(:nqb,:n) )
!
    tmp = one - (normb/norma)**2
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE BEST RELATIVE ERROR FROM THE SVD.
!
    relerr2 = sqrt(sum( (singval0(nqb+1_i4b:nsvd0)/norma)**2 ))
!
!   COMPUTE ERROR BETWEEN THE BEST AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE ORTHONORMAL MATRIX Q IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(nqb,nqb), id(nqb,nqb), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - q(:m,:nqb)**(t)*q(:m,:nqb).
!
        call unit_matrix( id(:nqb,:nqb) )
!
        res(:nqb,:nqb) = abs( id(:nqb,:nqb) - matmul( transpose(q(:m,:nqb)), q(:m,:nqb) ) )
!
        err1 = maxval( res(:nqb,:nqb) )/real(m,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( pivoting ) then
        deallocate( a, singval0, q, b, ip )
    else
        deallocate( a, singval0, q, b )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial QB or QR decomposition              = ', &
                      nqb
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-Q*B||_F / ||A||_F        = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-Q*B||_F / ||A||_F ) = ', &
                      relerr2
    write (prtunit,*) 
!
    if ( do_test ) then
        write (prtunit,*)
        write (prtunit,*) 'Orthogonality of the computed orthonormal matrix Q = ', err1
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a randomized partial QB or QR factorization of rank ', nqb, ' of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rqb_cmp
! ==========================
!
end program ex1_rqb_cmp

ex1_rqb_cmp_fixed_precision.F90

program ex1_rqb_cmp_fixed_precision
!
!
! Purpose
! =======
!
!   This program illustrates how to compute a randomized partial QB or QR factorization of a matrix,
!   which fullfills a given relative error in Frobenius norm using subroutine RQB_CMP_FIXED_PRECISION
!   in module Random.
!                                                                              
!                                                                              
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, two, three, seven, c30, allocate_error, &
                         merror, norm, unit_matrix, random_seed_, singval_sort, gen_random_mat,          &
                         rqb_cmp_fixed_precision
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX,
!
    integer(i4b), parameter :: prtunit=6, m=10000, n=5000, nsvd0=1000
!   
! relerr0 IS THE REQUESTED TOLERANCE FOR THE RELATIVE ERROR OF THE PARTIAL QB OR QR FACTORIZATION IN FROBENIUS NORM.
!
    real(stnd), parameter  :: relerr0=0.5_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of rqb_cmp_fixed_precision'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, eps, elapsed_time, norma, &
                                               tmp, relerr, relerr2
    real(stnd), dimension(:,:), allocatable :: a, res, id
    real(stnd), dimension(:),   allocatable :: singval0
!
    real(stnd), dimension(:,:), pointer :: q, b
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: blk_size, niter, niter_qb, maxiter_qb, nqb, i, mat_type
    integer(i4b), allocatable, dimension(:) :: ip
!
    logical(lgl) :: ortho, reortho, comp_qr, pivoting, failure_relerr, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL QB OR QR FACTORIZATION OF RANK nqb OF A m-BY-n REAL MATRIX USING A RANDOMIZED
!               ALGORITHM AS
!
!                              a(:m,:n) ≈ q(:m,:nqb)*b(:nqb,:n)
!
!               WHERE q IS A m-BY-nqb MATRIX WITH ORTHONORMAL COLUMNS, b IS A nqb-BY-n MATRIX AND
!               THE PRODUCT q*b IS A GOOD APPROXIMATION OF a ACCORDING TO THE SPECTRAL OR FROBENIUS
!               NORM. The RANK nqb IS DETERMINED SUCH THAT THE ASSOCIATED QB OR QR APPROXIMATION FULLFILLS
!               A PRESCRIBED RELATIVE ERROR IN THE FROBENIUS NORM AND IS NOT KNOWN IN ADVANCE.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL QB OR QR FACTORIZATION.
!
    eps = 0.05_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ORTHOGONALITY OF THE q MATRIX.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED QB OR QR ALGORITHM.
!
!   blk_size*maxiter_qb IS THE MAXIMUM ALLOWABLE RANK OF THE
!   PARTIAL QB OR QR FACTORIZATION, WHICH IS SOUGHT.
!
    blk_size   = 10_i4b
    maxiter_qb = 20_i4b
!
!   DETERMINE THE NUMBER OF POWER OR SUBSPACE ITERATIONS niter TO BE PERFORMED
!   IN THE FIRST STEP OF THE QB OR QR FACTORIZATION.
!
    niter = 1_i4b
!
!   SPECIFY IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS OF ACCURACY DUE
!   TO ROUNDING ERRORS.
!
    ortho = true
!
!   SPECIFY IF REORTHONORMALIZATION IS CARRIED OUT TO AVOID LOSS OF ORTHOGONALITY
!   IN THE BLOCK GRAM-SCHMIDT ORTHOGONALISATION SCHEME USED TO BUILD THE ORTHONORMAL
!   MATRIX OF THE QB OR QR DECOMPOSITION OF THE INPUT MATRIX.
!
    reortho = true
!
!   DETERMINE THE NUMBER OF SUBSPACE ITERATIONS niter_qb TO BE PERFORMED
!   IN THE LAST STEP OF THE QB FACTORIZATION.
!
    niter_qb = 1_i4b
!
!   DETERMINE IF A QB OR QR FACTORIZATION IS COMPUTED.
!
    comp_qr = true
!
!   DETERMINE IF A QR FACTORIZATION WITH COLUMN PIVOTING IS COMPUTED.
!
    pivoting = true
!
!   ALLOCATE WORK ARRAYS.
!
    if ( pivoting ) then
       allocate( a(m,n), ip(n), singval0(nsvd0), stat=iok )
    else
       allocate( a(m,n), singval0(nsvd0), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SORT THE SINGULAR VALUES.
!
    call singval_sort( sort, singval0(:nsvd0) )
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = norm( singval0(:nsvd0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rqb_cmp_fixed_precision COMPUTES AN APPROXIMATE PARTIAL QB OR QR DECOMPOSITION OF A REAL m-BY-n MATRIX a,
!   WITH A RANK AS SMALL AS POSSIBLE, BUT WHICH FULFILLS A PRESET TOLERANCE FOR ITS RELATIVE ERROR
!   IN THE FROBENIUS NORM:
!
!                                || A - Q*B ||_F <= ||A||_F * relerr
!
!   , WHERE Q*B IS THE COMPUTED PARTIAL QB OR QR APPROXIMATION, || ||_F IS THE FROBENIUS NORM AND
!   relerr IS A PRESCRIBED ACCURACY TOLERANCE FOR THE RELATIVE ERROR OF THE COMPUTED PARTIAL QB OR QR
!   APPROXIMATION, SPECIFIED IN THE INPUT ARGUMENT relerr.
!
!   HERE THE RANK OF THE PARTIAL QB OR QR DECOMPOSITION IS NOT KNOWN IN ADVANCE AND THE ARGUMENTS q
!   AND b, WHICH WILL CONTAIN THE TWO FACTORS OF THE QB OR QR DECOMPOSITION MUST BE SPECIFIED AS REAL
!   ARRAY POINTERS.
!
!   On EXIT, nqb = size( q, 2 ) = size( b, 1 ) IS THE RANK OF THE COMPUTED PARTIAL QB OR QR APPROXIMATION.
!
!   FIRST SET THE TOLERANCE FOR THE RELATIVE ERROR OF THE PARTIAL QB OR QR FACTORIZATION IN FROBENIUS NORM.
!
    relerr = relerr0
!
!   NULLIFY THE POINTERS q AND b SO THAT THEIR STATUT CAN BE CHECKED INSIDE
!   rqb_cmp_fixed_precision SUBROUTINE.
!
    nullify( q, b )
!
    if ( pivoting ) then
!
        call rqb_cmp_fixed_precision( a(:m,:n), relerr, q, b, failure_relerr=failure_relerr,     &
                                      niter=niter, blk_size=blk_size, maxiter_qb=maxiter_qb,     &
                                      ortho=ortho, reortho=reortho, niter_qb=niter_qb, ip=ip(:n) )
!
    else
!
        call rqb_cmp_fixed_precision( a(:m,:n), relerr, q, b, failure_relerr=failure_relerr, &
                                      niter=niter, blk_size=blk_size, maxiter_qb=maxiter_qb, &
                                      ortho=ortho, reortho=reortho, niter_qb=niter_qb,       &
                                      comp_qr=comp_qr )
!
    end if
!
!   THE ROUTINE RETURNS THE TWO FACTORS OF A PARTIAL QB OR QR DECOMPOSITION, WHICH FULFILLS
!   THE PRESET TOLERANCE SPECIFIED IN ARGUMENT relerr.
!
!   ON EXIT OF rqb_cmp_fixed_precision :
!
!       relerr CONTAINS THE RELATIVE ERROR IN FROBENIUS NORM OF THE COMPUTED PARTIAL QB OR QR DECOMPOSITION.
!
!       POINTER q CONTAINS THE OTHONORMAL MATRIX Q OF THE COMPUTED PARTIAL QB OR QR DECOMPOSITION.
!   
!       POINTER b CONTAINS THE ASSOCIATED MATRIX B OF THE COMPUTED PARTIAL QB OR QR DECOMPOSITION.
!
!       failure_relerr = false : INDICATES SUCCESSFUL EXIT AND THE COMPUTED PARTIAL QB OR QR
!       APPROXIMATION FULFILLS THE REQUESTED RELATIVE ERROR SPECIFIED ON ENTRY IN THE ARGUMENT relerr.
!
!       failure_relerr = true  : INDICATES THAT THE COMPUTED PARTIAL QB OR QR FACTORIZATION HAS A RELATIVE ERROR
!       LARGER THAN THE REQUESTED RELATIVE ERROR. THIS MEANS THAT THE REQUESTED ACCURACY TOLERANCE
!       FOR THE RELATIVE ERROR IS TOO SMALL (I.E., relerr < 2 * sqrt( epsilon( relerr )/relerr )
!       OR THAT THE INPUT PARAMETERS blk_size AND/OR maxiter_qb HAVE A TOO SMALL VALUE (E.G., THE
!       PRODUCT blk_size*maxiter_qb SETS THE MAXIMUM ALLOWABLE RANK FOR THE PARTIAL QB OR QR APPROXIMATION,
!       WHICH IS SOUGHT), GIVEN THE DISTRIBUTION OF THE SINGULAR VALUES OF mat, AND MUST BE INCREASED
!       TO FULLFILL THE PRESET ACCURACY TOLERANCE FOR THE RELATIVE ERROR OF THE PARTIAL QB OR QR APPROXIMATION.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   GET THE RANK OF THE COMPUTED PARTIAL QB OR QR FACTORIZATION.
!
    nqb = size( q, 2 )
!
!   COMPUTE THE BEST RELATIVE ERROR FROM THE SVD.
!
    relerr2 = norm( singval0(nqb+1_i4b:nsvd0)/norma )
!
!   COMPUTE ERROR BETWEEN THE BEST AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE ORTHONORMAL MATRIX Q IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(nqb,nqb), id(nqb,nqb), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - q(:m,:nqb)**(t)*q(:m,:nqb).
!
        call unit_matrix( id(:nqb,:nqb) )
!
        res(:nqb,:nqb) = abs( id(:nqb,:nqb) - matmul( transpose(q(:m,:nqb)), q(:m,:nqb) ) )
!
        err1 = maxval( res(:nqb,:nqb) )/real(m,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS AND POINTERS.
!
    if ( pivoting ) then
        deallocate( a, singval0, q, b, ip )
    else
        deallocate( a, singval0, q, b )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure_relerr ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial QB or QR decomposition              = ', &
                      nqb
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-Q*B||_F / ||A||_F        = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-Q*B||_F / ||A||_F ) = ', &
                      relerr2
!
    if ( failure_relerr ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Fail to converge within ', maxiter_qb,   &
                          ' iterations! ||A-Q*B||_F / ||A||_F = ', relerr, ' >= ', relerr0
        write (prtunit,*) 
!
    else
!
        write (prtunit,*) 
        write (prtunit,*) 'Converge within less than ', maxiter_qb ,' iterations! ||A-Q*B||_F / ||A||_F = ', &
                          relerr, ' < ', relerr0
        write (prtunit,*) 
!
    end if
!
    if ( do_test ) then
        write (prtunit,*)
        write (prtunit,*) 'Orthogonality of the computed orthonormal matrix Q = ', err1
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a randomized partial QB or QR factorization of rank ', nqb, ' of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rqb_cmp_fixed_precision
! ==========================================
!
end program ex1_rqb_cmp_fixed_precision

ex1_rqb_solve.F90

program ex1_rqb_solve
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine RQB_CMP in module Random
!   and RQB_SOLVE in module LLSQ_Procedures.
!                                                                              
!                                                                              
! LATEST REVISION : 05/02/2021
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, zero, one, seven, ten, c30, c100, &
                         allocate_error, merror, rqb_cmp, rqb_solve, norm, random_seed_,      &
                         random_number_ , singval_sort, gen_random_mat
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED RANDOM MATRIX,
! nrhs IS THE NUMBER OF COLUMNS OF THE RIGHT HAND SIDE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=3000, mn=min( m, n ), nsvd0=1000, nrhs=10
!   
    real(stnd), parameter  :: fudge=c100
!
    character(len=*), parameter :: name_proc='Example 1 of rqb_solve'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, err1, err2, tmp, anorm, cnorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: singval0, tau
    real(stnd), allocatable, dimension(:,:) :: a, q, b, c, x, res
!
    integer(i4b)                            :: nqb, niter, mat_type, i, j, l
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: ortho, comp_qr, pivoting
!   
    character :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING LINEAR LEAST SQUARES PROBLEMS WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A RANDOMIZED COMPLETE ORTHOGONAL DECOMPOSITION OF THE COEFFICIENT
!               MATRIX AND SEVERAL RIGHT HAND-SIDES. THE COEFFICIENT MATRIX CAN BE RANK DEFICIENT
!               AND BOTH m>=n OR m<n ARE PERMITTED.
!
!               COMPUTE APPROXIMATE SOLUTION MATRIX x FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n,:nrhs) ≈ c(:m,:nrhs) .
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 3_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED QB OR QR ALGORITHM.
!
!   DETERMINE THE TARGET RANK OF THE RANDOMIZED PARTIAL QB/QR FACTORIZATION, WHICH IS SOUGHT.
!
    nqb = 40_i4b
!
!   DETERMINE THE NUMBER OF SUBSPACE ITERATIONS niter TO BE PERFORMED.
!
    niter = 2_i4b
!
!   DETERMINE IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS
!   OF ACCURACY DUE TO ROUNDING ERRORS.
!
    ortho = true
!
!   DETERMINE IF A QB OR QR FACTORIZATION IS COMPUTED.
!
    comp_qr = true
!
!   DETERMINE IF A QR FACTORIZATION WITH COLUMN PIVOTING IS COMPUTED.
!
    pivoting = true
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE B OR R FACTOR IN THE rqb_cmp SUBROUTINE.
!
    tol = ulp
!
!   ALLOCATE WORK ARRAYS.
!
    if ( pivoting ) then
!
        allocate( a(m,n), q(m,nqb), b(nqb,n), c(m,nrhs), x(n,nrhs), singval0(nsvd0),  &
                  tau(nqb), ip(n), res(nrhs,n), stat=iok )
!
    else
!
        allocate( a(m,n), q(m,nqb), b(nqb,n), c(m,nrhs), x(n,nrhs), singval0(nsvd0),  &
                  tau(nqb), res(nrhs,n), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            err1 = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/err1 )
!        
            end do
!
    end select
!
!   SORT THE SINGULAR VALUES.
!
    call singval_sort( sort, singval0(:nsvd0) )
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a(:m,:n) )
!
!   COMPUTE THE FROBENIUS NORM OF THE COEFFICIENT MATRIX.
!
!    anorm = norm( a(:m,:n) )
    anorm = sqrt(sum( singval0(:nsvd0)**2 ) )
!
!   GENERATE A RIGHT HAND-SIDE MATRIX.
!
    call random_number_( c(:m,:nrhs) )
!
!   COMPUTE FROBENIUS NORM OF THE RIGHT HAND-SIDE MATRIX.
!
    cnorm = norm( c(:m,:nrhs) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A RANDOMIZED COMPLETE ORTHOGONAL DECOMPOSITION OF RANDOM DATA MATRIX
!   WITH SUBROUTINE rqb_cmp.
!
!   rqb_cmp COMPUTES AN APPROXIMATE PARTIAL QB, QR OR COMPLETE ORTHOGONAL DECOMPOSITION OF A REAL m-BY-n
!   MATRIX a USING A RANDOMIZED ALGORITHM AND SUBSPACE ITERATIONS. THE PARTIAL QB OR QR DECOMPOSITION
!   IS WRITTEN
!
!                  a(:m,:n) ≈ q(:m,:nqb)*b(:nqb,:n)
!
!   WHERE q IS A m-BY-nqb MATRIX WITH ORTHONORMAL COLUMNS, b IS A nqb-BY-n (FULL OR TRAPEZOIDAL)
!   MATRIX AND THE PRODUCT q*b IS A GOOD APPROXIMATION OF a ACCORDING TO THE SPECTRAL OR FROBENIUS
!   NORM.
!
    if ( pivoting ) then
!
        call rqb_cmp( a(:m,:n), q(:m,:nqb), b(:nqb,:n), niter=niter, ortho=ortho, ip=ip(:n), &
                      tol=tol, tau=tau(:nqb) )
!
    else
!
        call rqb_cmp( a(:m,:n), q(:m,:nqb), b(:nqb,:n), niter=niter, ortho=ortho, comp_qr=comp_qr, &
                      tol=tol, tau=tau(:nqb) )
!
    end if
!
!   HERE THE ROUTINE COMPUTES AN APPROXIMATE COMPLETE ORTHOGONAL FACTORIZATION OF a USING
!   A RANDOMIZED ALGORITHM.
!                      
!   THIS APPROXIMATE COMPLETE ORTHOGONAL FACTORIZATION CAN THEN BE USED TO COMPUTE THE MINIMAL 2-NORM
!   SOLUTIONS FOR (RANK DEFICIENT) LINEAR LEAST SQUARES PROBLEMS WITH SUBROUTINE rqb_solve.
!
!   NEXT, COMPUTE THE SOLUTION MATRIX FOR LINEAR LEAST SQUARE SYSTEM
!
!                   a(:m,:n)*x(:n,:nrhs) ≈ b(:m,:nrhs) .
!
!   WITH SUBROUTINE rqb_solve AND THE APPROXIMATE COMPLETE ORTHOGONAL DECOMPOSITION OUTPUT BY rqb_cmp.
!
    call rqb_solve( q(:m,:nqb), b(:nqb,:n), c(:m,:nrhs), x(:n,:nrhs), ip(:n),  &
                    tau=tau(:nqb), comp_resid=true )
!
!   rqb_solve COMPUTES APPROXIMATES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || c - a*x ||_2
!
!   USING A RANDOMIZED QR FACTORIZATION WITH COLUMNS PIVOTING OR COMPLETE
!   ORTHOGONAL FACTORIZATION OF a AS COMPUTED BY rqb_cmp. a IS AN m-BY-n MATRIX
!   WHICH MAY BE RANK-DEFICIENT. m>=n OR n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS c AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL OF rqb_solve; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX c AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS c AND x MAY
!   BE VECTORS OR MATRICES. c IS OVERWRITTEN BY rqb_solve.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX c - a*x is COMPUTED AND OVERWRITES c.
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL ARRAY PARAMETER tau IS
!   PRESENT IN BOTH THE CALLS OF rqb_cmp AND rqb_solve SUBROUTINES. OTHERWISE, SOLUTION(S) ARE
!   COMPUTED SUCH THAT IF THE jTH COLUMN OF a IS OMITTED FROM THE BASIS, x[j] O
!   x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE ORTHONORMAL MATRIX q .
!
    res(:nrhs,:nqb) = matmul( transpose( c(:m,:nrhs) ), q(:m,:nqb) )
!
    err1 = maxval( abs(res(:nrhs,:nqb)) )
!
!   CHECK THAT THE RESIDUALS ARE ALMOST ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
    res(:nrhs,:n) = matmul( transpose( c(:m,:nrhs) ), a(:m,:n) )
!
!    err2 = maxval( abs(res(:nrhs,:n)) )/(cnorm*anorm)
    
    err2 = norm( res(:nrhs,:n) )/(cnorm*anorm)
!
!   PRINT THE RESULTS OF THE TEST.
!
    if ( err1<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*)
    write (prtunit,*) 'Orthogonality of the residual matrix to the range of the coefficient matrix = ', err2
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
      m, ' by ', n,' real coefficient matrix and a ', m, ' by ', nrhs,       &
      ' right hand side matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( pivoting ) then
        deallocate( a, q, b, c, x, res, singval0, tau, ip )
    else
        deallocate( a, q, b, c, x, res, singval0, tau )
    end if
!
!
! END OF PROGRAM ex1_rqb_solve
! ============================
!
end program ex1_rqb_solve

ex1_rqlp_cmp.F90

program ex1_rqlp_cmp
!
!
! Purpose
! =======
!
!   This program illustrates how to compute a randomized partial QLP decomposition
!   using subroutine RQLP_CMP in module SVD_Procedures.
!
!
! LATEST REVISION : 24/03/2022
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror,  &
                         allocate_error, norm, unit_matrix, random_seed_, random_number_,    &
                         gen_random_mat, rqlp_cmp, singval_sort
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn),
! nqlp IS THE TARGET RANK OF THE RANDOMIZED PARTIAL QLP FACORIZATION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000, mn=min(m,n), nsvd0=300, nqlp=20
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of rqlp_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, eps, elapsed_time, anorm, lnorm, tmp, tmp2, &
                  relerr, relerr2, abs_err, rel_err
    real(stnd), dimension(:,:), allocatable :: a, qmat, lmat, pmat, res, id
    real(stnd), dimension(:),   allocatable :: singval0, lval
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: niter, niter_qrql, i, mat_type
!
    logical(lgl) :: ortho, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : RANDOMIZED PARTIAL QLP DECOMPOSITION OF A m-BY-n REAL MATRIX USING SUBSPACE
!               ITERATIONS AND GAUSSIAN COMPRESSION IN THE FIRST STAGE OF THE ALGORITHM AND
!               QR-QL ITERATIONS IN A FINAL STAGE FOR IMPROVING THE ACCURACY OF THE L-VALUES.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL QLP FACTORIZATION COMPARED TO THE BEST SVD
!   APPROXIMATION.
!
    eps = 0.05_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE RANDOMIZED
!   PARTIAL QLP DECOMPOSITION.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMZED PARTIAL QLP ALGORITHM.
!
!   CHOOSE THE NUMBER OF SUBSPACE ITERATIONS TO BE PERFORMED IN THE FIRST PHASE
!   OF THE RANDOMIZED PARTIAL QLP ALGORITHM FOR IMPROVING ITS QUALITY.
!
    niter = 4_i4b
!
!   DETERMINE IF ORTHONORMALIZATION IS PERFORMED BETWEEN EACH STEP OF THE SUBSPACE ITERATIONS.
!
    ortho = true
!
!   CHOOSE THE NUMBER OF QR-QL ITERATIONS TO BE PERFORMED FOR IMPROVING THE QUALITY OF THE L-VALUES.
!
    niter_qrql = 4_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), qmat(m,nqlp), pmat(nqlp,n), lmat(nqlp,nqlp), &
              singval0(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( singval0 ) ) then
!
        if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
    call singval_sort( 'D', singval0(:nsvd0) )
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE GENERATED MATRIX.
!
    anorm = norm( singval0(:nsvd0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rqlp_cmp COMPUTES A RANDOMIZED PARTIAL QLP DECOMPOSITION OF A REAL m-BY-n MATRIX a. 
!   THE PARTIAL QLP IS WRITTEN
!
!                       a  ≈ Q * L * P
!
!   WHERE L IS AN nqlp-BY-nqlp LOWER TRIANGULAR MATRIX WHOSE DIAGONAL ELEMENTS (IN ABSOLUTE VALUE) ARE
!   GOOD APPROXIMATIONS OF THE nqlp LARGEST SINGULAR VALUES OF a SORTED IN DECREASING ORDER (E.G,. THE
!   SO-CALLED L-VALUES), Q IS AN m-BY-nqlp ORTHONORMAL MATRIX, AND L IS AN nqlp-BY-n ORTHONORMAL MATRIX
!   STORED ROWWISE. THE QUALITY OF THE COMPUTED PARTIAL QLP DECOMPOSITION CAN BE IMPROVED BY niter
!   INITIAL SUBSPACE ITERATIONS. THE QUALITY OF L-VALUES CAN ALSO BE IMPROVED BY ADDITIONAL QR-QL ITERATIONS
!   IF REQUIRED.
!
    call rqlp_cmp( a(:m,:n), lmat(:nqlp,:nqlp), qmat(:m,:nqlp), pmat(:nqlp,:n), &
                   niter=niter, ortho=ortho, niter_qrql=niter_qrql )
!
!   THE ROUTINE RETURNS THE PARTIAL QLP FACTORIZATION EXPLICITLY IN THE ARRAY ARGUMENTS lmat, qmat AND pmat
!   SPECIFIED IN INPUT OF rqlp_cmp SUBROUTINE.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR IN FROBENIUS NORM
!   FOR THE PARTIAL QLP DECOMPOSITION OF RANK nqlp.
!
    lnorm = norm( lmat(:nqlp,:nqlp) )
!
    tmp = one -  (lnorm/anorm)**2
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE BEST RELATIVE ERROR IN FROBENIUS NORM
!   FOR A PARTIAL SVD DECOMPOSITION OF RANK nqlp.
!
    if ( nsvd0>nqlp ) then
        relerr2 = norm( singval0(nqlp+1_i4b:nsvd0)/anorm )
    else
        relerr2 = zero
    end if
!
!   COMPUTE ERROR BETWEEN THE BEST AND QLP RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE QLP FACTORS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( lval(nqlp), res(m,nqlp), id(nqlp,nqlp), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       GET THE L-VALUES.
!
        do i = 1_i4b, nqlp
            lval(i) = abs( lmat(i,i) )
        end do
!
!       COMPUTE ERRORS FOR THE L-VALUES AS ESTIMATES OF THE SINGULAR VALUES.
!
        i = min( nqlp, nsvd0 )
!
        id(:i,1_i4b) = singval0(:i)
        id(i+1_i4b:nqlp,1_i4b) = zero
!
        where( id(:nqlp,1_i4b)/=zero )
            res(:nqlp,1_i4b) = id(:nqlp,1_i4b)
        elsewhere
            res(:nqlp,1_i4b) = one
        end where
!
!       ABSOLUTE ERRORS OF L-VALUES AS ESTIMATES OF SINGULAR VALUES.
!
        abs_err = maxval( abs( lval(:nqlp) - id(:nqlp,1_i4b) ) )
!
!       RELATIVE ERRORS OF L-VALUES AS ESTIMATES OF SINGULAR VALUES.
!
        rel_err = maxval( abs( (lval(:nqlp) - id(:nqlp,1_i4b))/res(:nqlp,1_i4b) ) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - q(:m,:nqlp)**(t)*q(:m,:nqlp).
!
        call unit_matrix( id(:nqlp,:nqlp) )
!
        res(:nqlp,:nqlp) = abs( id(:nqlp,:nqlp) - matmul( transpose(qmat), qmat ) )
!
        err1 = maxval( res(:nqlp,:nqlp) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - p(:nqlp,:n)*p(:nqlp,:n)**(t).
!
        res(:nqlp,:nqlp) = abs( id(:nqlp,:nqlp) - matmul( pmat, transpose(pmat) ) )
!
        err2 = maxval( res(:nqlp,:nqlp) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( lval, res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, singval0, lmat, qmat, pmat )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the randomized partial QLP approximation                     = ', &
                      nqlp
    write (prtunit,*) 'Number of subspaces iterations performed                             = ', &
                      niter
    write (prtunit,*) 'Number of QR-QL iterations performed                                 = ', &
                      niter_qrql
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-QLP||_F / ||A||_F         = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                      relerr2
!
    if ( do_test ) then
        write (prtunit,*)
        write (prtunit,*) 'Absolute accuracy of the computed L-values    = ', abs_err
        write (prtunit,*) 'Relative accuracy of the computed L-values    = ', rel_err
        write (prtunit,*) 'Orthogonality of the computed Q matrix        = ', err1
        write (prtunit,*) 'Orthogonality of the computed P matrix        = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a randomized partial QLP approximation of rank ', nqlp, &
      ' of a ', m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rqlp_cmp
! ===========================
!
end program ex1_rqlp_cmp

ex1_rqlp_svd_cmp.F90

program ex1_rqlp_svd_cmp
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial SVD with a randomized QLP-SVD
!   algorithm and subspace iterations using subroutine RQLP_SVD_CMP in module SVD_Procedures.
!                                                                              
!                                                                              
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror,  &
                         allocate_error, norm, unit_matrix, random_seed_, random_number_,    &
                         gen_random_mat, rqlp_svd_cmp, singval_sort
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn)
! FOR CASES GREATER THAN 0,
! nsvd IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, m=10000, n=10000, mn=min(m,n), nsvd0=5000, nsvd=20
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of rqlp_svd_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, anorm, tmp, tmp2, relerr, relerr2, &
                  abs_err, rel_err
    real(stnd), dimension(:,:), allocatable :: a, leftvec, rightvec, res, id
    real(stnd), dimension(:),   allocatable :: singval, singval0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: niter, blk_size, nover, nover_svd, i, mat_type
!
    logical(lgl) :: failure, random_qr, truncated_qr, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : APPROXIMATE PARTIAL SVD OF A m-BY-n REAL MATRIX USING A RANDOMIZED PARTIAL
!               QLP DECOMPOSITION AND SUBSPACE ITERATIONS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 8_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL SVD.
!
    eps = 0.01_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED ALGORITHM.
!
!   DETERMINE IF A RANDOMIZED PARTIAL QLP ALGORITHM IS USED IN THE FIRST PHASE OF
!   THE QLP-SVD ALGORITHM.
!
    random_qr = true
!
!   DETERMINE IF A RANDOMIZED PARTIAL AND TRUNCATED QLP ALGORITHM IS USED IN THE FIRST PHASE OF
!   THE QLP-SVD ALGORITHM.
!
    truncated_qr = false
!
!   DETERMINE THE BLOCK SIZE USED IN THE RANDOMIZED PARTIAL QLP PHASE OF THE ALGORITHM.
!
    blk_size = 60_i4b
!    blk_size = 30_i4b
!
!   DETERMINE THE NUMBER OF SUBSPACE ITERATIONS niter TO BE PERFORMED.
!
    niter = 2_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover FOR THE RANDOMIZED PARTIAL QLP PHASE OF THE ALGORITHM.
!
    nover = 10_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover_svd FOR THE FINAL SVD PHASE OF THE ALGORITHM.
!
    nover_svd = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), singval(nsvd), leftvec(m,nsvd), rightvec(n,nsvd),   &
              singval0(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( singval0 ) ) then
!
            if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', singval0(:nsvd0) )
!
!       GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
        call gen_random_mat( singval0(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( singval0(:nsvd0) )
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rqlp_svd_cmp COMPUTES A PARTIAL SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   m-BY-n MATRIX a. THE PARTIAL SVD IS WRITTEN
!
!                       U * S * V**(t)
!
!   WHERE S IS AN nsvd-BY-nsvd MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN m-BY-nsvd ORTHONORMAL MATRIX, AND
!   V IS AN n-BY-nsvd ORTHONORMAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE nsvd LARGEST SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a.
!
    call rqlp_svd_cmp( a(:m,:n), singval(:nsvd), leftvec(:m,:nsvd), rightvec(:n,:nsvd),              &
                       failure=failure, niter=niter, random_qr=random_qr, truncated_qr=truncated_qr, &
                       blk_size=blk_size, nover=nover, nover_svd=nover_svd )
!
!   THE ROUTINE RETURNS THE nsvd LARGEST SINGULAR VALUES AND THE ASSOCIATED LEFT
!   AND RIGHT SINGULAR VECTORS.
!
!   ON EXIT OF rqlp_svd_cmp :
!
!       singval CONTAINS THE nsvd LARGEST SINGULAR VALUES OF a IN DECREASING ORDER.
!
!       leftvec CONTAINS THE ASSOCIATED nsvd LEFT SINGULAR VECTORS,
!       STORED COLUMNWISE;
!   
!       rightvec CONTAINS THE ASSOCIATED nsvd RIGHT SINGULAR VECTORS,
!       STORED COLUMNWISE;
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE (RANDOMIZED)
!                         PARTIAL QLP-SVD ALGORITHM. THE RESULTS CAN BE STILL USEFUL,
!                         BUT THE APPROXIMATIONS OF SOME OF THE nsvd TOP SINGULAR
!                         TRIPLETS CAN BE POOR.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (singval(:nsvd)/anorm)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    if ( mat_type>0_i4b ) then
!
        if ( nsvd0>nsvd ) then
            relerr2 = norm( singval0(nsvd+1_i4b:nsvd0)/anorm )
        else
            relerr2 = zero
        end if
!
!       COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
        err = abs( relerr - relerr2 )
!
    else
!
        err = zero
!
    end if
!
!   TEST ACCURACY OF THE COMPUTED SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(m,nsvd), id(nsvd,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            i = min( nsvd, nsvd0 )
!
            id(:i,1_i4b) = singval0(:i)
            id(i+1_i4b:nsvd,1_i4b) = zero
!
            where( id(:nsvd,1_i4b)/=zero )
                res(:nsvd,1_i4b) = id(:nsvd,1_i4b)
            elsewhere
                res(:nsvd,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( singval(:nsvd) - id(:nsvd,1_i4b) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (singval(:nsvd) - id(:nsvd,1_i4b))/res(:nsvd,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:n,:nsvd) - u(:m,:nsvd)*diag(singval(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        res(:m,:nsvd) = matmul(a,rightvec) - leftvec*spread(singval(:nsvd),dim=1,ncopies=m)
        id(:nsvd,1_i4b) = norm( res(:m,:nsvd), dim=2_i4b )
!
        err1 = maxval( id(:nsvd,1_i4b) )/( anorm*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:m,:nsvd)**(t)*u(:m,:nsvd).
!
        call unit_matrix( id(:nsvd,:nsvd) )
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( res(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:n,:nsvd)**(t)*v(:n,:nsvd).
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( res(:nsvd,:nsvd) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, singval0, singval, leftvec, rightvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial SVD                                  = ', &
                      nsvd
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-rSVD||_F / ||A||_F        = ', &
                      relerr
!
    if ( mat_type>0_i4b ) then
!
        write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                          relerr2
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) 'FAILURE ( from rqlp_svd_cmp() ) = ', failure
!
    if ( do_test ) then
!
        write (prtunit,*)
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rqlp_svd_cmp
! ===============================
!
end program ex1_rqlp_svd_cmp

ex1_rqlp_svd_cmp2.F90

program ex1_rqlp_svd_cmp2
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial SVD with a randomized QLP-SVD
!   algorithm and subspace iterations using subroutine RQLP_SVD_CMP2 in module SVD_Procedures.
!                                                                              
!                                                                              
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror,  &
                         allocate_error, norm, unit_matrix, random_seed_, random_number_,    &
                         rqlp_svd_cmp2, gen_random_mat, singval_sort
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn)
! FOR CASES GREATER THAN 0,
! nsvd IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, m=10000, n=10000, mn=min(m,n), nsvd0=5000, nsvd=20
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of rqlp_svd_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, anorm, tmp, tmp2, relerr, relerr2, &
                  abs_err, rel_err
    real(stnd), dimension(:,:), allocatable :: a, leftvec, rightvec, res, id
    real(stnd), dimension(:),   allocatable :: singval, singval0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: niter, nover, nover_svd, i, mat_type
!
    logical(lgl) :: failure, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : APPROXIMATE PARTIAL SVD OF A m-BY-n REAL MATRIX USING A RANDOMIZED PARTIAL
!               QLP DECOMPOSITION AND SUBSPACE ITERATIONS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 8_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL SVD.
!
    eps = 0.01_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED ALGORITHM.
!
!   DETERMINE THE NUMBER OF SUBSPACE ITERATIONS niter TO BE PERFORMED.
!
    niter = 2_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover FOR THE RANDOMIZED PARTIAL QLP PHASE OF THE ALGORITHM.
!
    nover = 10_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover_svd FOR THE FINAL SVD PHASE OF THE ALGORITHM.
!
    nover_svd = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), singval(nsvd), leftvec(m,nsvd), rightvec(n,nsvd),   &
              singval0(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( singval0 ) ) then
!
            if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', singval0(:nsvd0) )
!
!       GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
        call gen_random_mat( singval0(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( singval0(:nsvd0) )
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rqlp_svd_cmp2 COMPUTES AN APPROXIMATE  PARTIAL SINGULAR VALUE DECOMPOSITION (SVD)
!   OF A REAL m-BY-n MATRIX a. THE PARTIAL SVD IS WRITTEN
!
!                       a ≈ U * S * V**(t)
!
!   WHERE S IS AN nsvd-BY-nsvd MATRIX WHICH IS ZERO EXCEPT FOR ITS DIAGONAL ELEMENTS,
!   U IS AN m-BY-nsvd ORTHONORMAL MATRIX, AND V IS AN n-BY-nsvd ORTHONORMAL MATRIX.
!   THE DIAGONAL ELEMENTS OF S ARE APPROXIMATIONS OF THE nsvd LARGEST SINGULAR VALUES OF a;
!   THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE APPROXIMATIONS OF THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a.
!
    call rqlp_svd_cmp2( a(:m,:n), singval(:nsvd), leftvec(:m,:nsvd), rightvec(:n,:nsvd), &
                        failure=failure, niter=niter, nover=nover, nover_svd=nover_svd   )
!
!   THE ROUTINE RETURNS THE nsvd LARGEST SINGULAR VALUES AND THE ASSOCIATED LEFT
!   AND RIGHT SINGULAR VECTORS.
!
!   ON EXIT OF rqlp_svd_cmp2 :
!
!       singval CONTAINS THE nsvd LARGEST SINGULAR VALUES OF a IN DECREASING ORDER.
!
!       leftvec CONTAINS THE ASSOCIATED nsvd LEFT SINGULAR VECTORS,
!       STORED COLUMNWISE;
!   
!       rightvec CONTAINS THE ASSOCIATED nsvd RIGHT SINGULAR VECTORS,
!       STORED COLUMNWISE;
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE RANDOMIZED
!                         PARTIAL QLP-SVD ALGORITHM. THE RESULTS CAN BE STILL USEFUL,
!                         BUT THE APPROXIMATIONS OF SOME OF THE nsvd TOP SINGULAR
!                         TRIPLETS CAN BE POOR.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (singval(:nsvd)/anorm)**2 )
    relerr = sqrt( max( tmp, zero ) )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    if ( mat_type>0_i4b ) then
!
        if ( nsvd0>nsvd ) then
            relerr2 = norm( singval0(nsvd+1_i4b:nsvd0)/anorm )
        else
            relerr2 = zero
        end if
!
!       COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
        err = abs( relerr - relerr2 )
!
    else
!
        err = zero
!
    end if
!
!   TEST ACCURACY OF THE COMPUTED SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(m,nsvd), id(nsvd,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            i = min( nsvd, nsvd0 )
!
            id(:i,1_i4b) = singval0(:i)
            id(i+1_i4b:nsvd,1_i4b) = zero
!
            where( id(:nsvd,1_i4b)/=zero )
                res(:nsvd,1_i4b) = id(:nsvd,1_i4b)
            elsewhere
                res(:nsvd,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( singval(:nsvd) - id(:nsvd,1_i4b) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (singval(:nsvd) - id(:nsvd,1_i4b))/res(:nsvd,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:n,:nsvd) - u(:m,:nsvd)*diag(singval(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        res(:m,:nsvd) = matmul(a,rightvec) - leftvec*spread(singval(:nsvd),dim=1,ncopies=m)
        id(:nsvd,1_i4b) = norm( res(:m,:nsvd), dim=2_i4b )
!
        err1 = maxval( id(:nsvd,1_i4b) )/( anorm*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:m,:nsvd)**(t)*u(:m,:nsvd).
!
        call unit_matrix( id(:nsvd,:nsvd) )
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( res(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:n,:nsvd)**(t)*v(:n,:nsvd).
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( res(:nsvd,:nsvd) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, singval0, singval, leftvec, rightvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate randomized partial SVD                       = ', &
                      nsvd
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-rSVD||_F / ||A||_F        = ', &
                      relerr
!
    if ( mat_type>0_i4b ) then
!
        write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                          relerr2
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) 'FAILURE ( from rqlp_svd_cmp2() ) = ', failure
!
    if ( do_test ) then
!
        write (prtunit,*)
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rqlp_svd_cmp2
! ================================
!
end program ex1_rqlp_svd_cmp2

ex1_rqlp_svd_cmp_fixed_precision.F90

program ex1_rqlp_svd_cmp_fixed_precision
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial SVD with a (randomized)
!   QLP-SVD algorithm, which fullfills a given relative error in Frobenius norm using
!   subroutine RQLP_SVD_CMP_FIXED_PRECISION in module SVD_Procedures.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror, &
                         norm, unit_matrix, random_seed_, random_number_, gen_random_mat,   &
                         rqlp_svd_cmp_fixed_precision, merror, allocate_error
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn),
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000, mn=min(m,n), nsvd0=3000
!   
! relerr0 IS THE REQUESTED RELATIVE ERROR OF THE RESTRICTED SVD IN FROBENIUS NORM,
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: relerr0=0.5_stnd, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of rqlp_svd_cmp_fixed_precision'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, anorm, relerr, relerr2, tmp, tmp2
    real(stnd), dimension(:,:), allocatable :: a, res, id
    real(stnd), dimension(:),   allocatable :: singval0
!
    real(stnd), dimension(:,:), pointer :: leftvec, rightvec
    real(stnd), dimension(:),   pointer :: singval
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: niter, blk_size, nover, i, nsvd, mat_type
!
    logical(lgl) :: random_qr, failure, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : APPROXIMATE PARTIAL SVD OF A m-BY-n REAL MATRIX USING A (RANDOMIZED)
!               QLP-SVD ALGORITHM, WHICH FULLFILLS A PRESCRIBED TOLERANCE
!               FOR ITS RELATIVE ERROR IN THE FROBENIUS NORM.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 7_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL SVD.
!
    eps = 0.01_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED QLP-SVD ALGORITHM.
!
!   DETERMINE IF A RANDOMIZED PARTIAL QR ALGORITHM IS USED IN THE FIRST QR PHASE OF THE QLP-SVD ALGORITHM.
!
    random_qr = true
!
!   DETERMINE THE BLOCK SIZE blk_size FOR THE RANDOMIZED PARTIAL QR PHASE OF THE QLP-SVD ALGORITHM.
!
    blk_size = 30_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover FOR THE RANDOMIZED PARTIAL QR PHASE OF THE QLP-SVD ALGORITHM.
!
    nover = 10_i4b
!
!   DETERMINE THE NUMBER niter OF QR-QL (E.G. SUBSPACE) ITERATIONS PERFORMED AFTER THE INITIAL QLP
!   FACTORIZATION IN THE QLP-SVD ALGORITHM. BY DEFAULT, NO SUBSPACES ITERATIONS ARE PERFORMED.
!
    niter = 2_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), singval0(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( singval0 ) ) then
!
        if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
    anorm = norm( singval0(:nsvd0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rqlp_svd_cmp_fixed_precision COMPUTES AN APPROXIMATE  PARTIAL SINGULAR VALUE DECOMPOSITION (SVD)
!   OF A REAL m-BY-n MATRIX a, WITH A RANK AS SMALL AS POSSIBLE, BUT WHICH FULFILLS A PRESET
!   TOLERANCE FOR ITS RELATIVE ERROR IN THE FROBENIUS NORM:
!
!            ||a-rSVD||_F <= ||a||_F * relerr
!
!   , WHERE rSVD IS THE COMPUTED PARTIAL SVD APPROXIMATION, || ||_F IS THE FROBENIUS NORM AND
!   relerr IS A PRESCRIBED ACCURACY TOLERANCE FOR THE RELATIVE ERROR OF THE COMPUTED PARTIAL SVD
!   APPROXIMATION, SPECIFIED IN THE INPUT ARGUMENT relerr.
!
!   THE PARTIAL SVD IS WRITTEN
!
!                       U * S * V'
!
!   WHERE S IS AN nsvd-BY-nsvd MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN m-BY-nsvd ORTHONORMAL MATRIX, AND
!   V IS AN n-BY-nsvd ORTHONORMAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE nsvd LARGEST SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   HERE THE RANK OF THE PARTIAL SVD IS NOT KNOWN IN ADVANCE AND THE ARGUMENTS singval,
!   leftvec AND rightvec, WHICH WILL CONTAIN THE SINGULAR VALUES AND THE ASSOCIATED SINGULAR
!   VECTORS IN EXIT OF THE SUBROUTINE MUST BE SPECIFIED AS REAL ARRAY POINTERS.
!   On EXIT, nsvd = size( singval ) IS THE RANK OF THE COMPUTED PARTIAL SVD.
!
!   SET THE TOLERANCE FOR THE RELATIVE ERROR IN FROBENIUS NORM.
!
    relerr = relerr0
!
!   NULLIFY THE POINTERS singval, leftvec AND rightvec SO THAT THEIR STATUT CAN BE CHECKED INSIDE
!   rqlp_svd_cmp_fixed_precision SUBROUTINE.
!
    nullify( singval, leftvec, rightvec )
!
    call rqlp_svd_cmp_fixed_precision( a(:m,:n), relerr, singval, leftvec, rightvec, failure=failure,   &
                                       niter=niter, random_qr=random_qr, blk_size=blk_size, nover=nover )
!
!   THE ROUTINE RETURNS THE nsvd LARGEST SINGULAR VALUES AND THE ASSOCIATED LEFT
!   AND RIGHT SINGULAR VECTORS, WHICH FULFILLS THE PRESET TOLERANCE SPECIFIED IN ARGUMENT
!   relerr.
!
!   ON EXIT OF rqlp_svd_cmp_fixed_precision :
!
!       relerr CONTAINS THE RELATIVE ERROR IN FROBENIUS NORM OF THE COMPUTED PARTIAL SVD.
!
!       POINTER singval CONTAINS THE nsvd = size(singval) LARGEST SINGULAR VALUES OF a
!       IN DECREASING ORDER.
!
!       POINTER leftvec CONTAINS THE ASSOCIATED nsvd LEFT SINGULAR VECTORS
!       STORED COLUMNWISE.
!   
!       POINTER rightvec CONTAINS THE ASSOCIATED nsvd RIGHT SINGULAR VECTORS
!       STORED COLUMNWISE.
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND THAT
!                         FULL ACCURACY WAS NOT ATTAINED IN THE (RANDOMIZED)
!                         PARTIAL QLP-SVD ALGORITHM WITH niter QR-QL ITERATIONS
!                         FOR THE REQUESTED ACCURACY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   GET THE RANK OF THE COMPUTED PARTIAL SVD.
!
    nsvd = size( singval )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    if ( nsvd0>nsvd ) then
        relerr2 = norm( singval0(nsvd+1_i4b:nsvd0)/anorm )
    else
        relerr2 = zero
    end if
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE COMPUTED SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(m,nsvd), id(nsvd,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:n,:nsvd) - u(:m,:nsvd)*diag(singval(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        res(:m,:nsvd) = matmul(a,rightvec) - leftvec(:m,:nsvd)*spread(singval(:nsvd),dim=1,ncopies=m)
        id(:nsvd,1_i4b) = norm( res(:m,:nsvd), dim=2_i4b )
!
        if ( anorm==zero ) then
            anorm = one
        end if
!
        err1 = maxval( id(:nsvd,1_i4b) )/( anorm*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:m,:nsvd)**(t)*u(:m,:nsvd).
!
        call unit_matrix( id(:nsvd,:nsvd) )
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(leftvec(:m,:nsvd)), leftvec(:m,:nsvd) ) )
!
        err2 = maxval( res(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:n,:nsvd)**(t)*v(:n,:nsvd).
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( res(:nsvd,:nsvd) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS AND POINTERS.
!
    deallocate( a, singval0, singval, leftvec, rightvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial SVD                                  = ', &
                      nsvd
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-rSVD||_F / ||A||_F        = ', &
                      relerr, ' < ', relerr0
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                      relerr2
    write (prtunit,*) 
!
    write (prtunit,*) 
    write (prtunit,*) 'FAILURE ( from rqlp_svd_cmp_fixed_precision() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*)
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rqlp_svd_cmp_fixed_precision
! ===============================================
!
end program ex1_rqlp_svd_cmp_fixed_precision

ex1_rqr_svd_cmp.F90

program ex1_rqr_svd_cmp
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial SVD with a (randomized)
!   QR-SVD algorithm using subroutine RQR_SVD_CMP in module SVD_Procedures.
!                                                                              
!                                                                              
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror, allocate_error,  &
                         norm, unit_matrix, random_seed_, random_number_, rqr_svd_cmp, gen_random_mat,       &
                         singval_sort
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn)
! FOR CASES GREATER THAN 0,
! nsvd IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, m=10000, n=10000, mn=min(m,n), nsvd0=5000, nsvd=20
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of rqr_svd_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, anorm, tmp, tmp2, relerr, relerr2, &
                  abs_err, rel_err
    real(stnd), dimension(:,:), allocatable :: a, leftvec, rightvec, res, id
    real(stnd), dimension(:),   allocatable :: singval, singval0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: blk_size, nover,  nover_svd, i, mat_type
!
    logical(lgl) :: failure, random_qr, truncated_qr, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : APPROXIMATE PARTIAL SVD OF A m-BY-n REAL MATRIX USING A (RANDOMIZED) PARTIAL
!               QR-SVD DECOMPOSITION.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 8_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL SVD.
!
    eps = 0.01_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED QR-SVD ALGORITHM.
!
!   DETERMINE IF A RANDOMIZED PARTIAL QR ALGORITHM IS USED IN THE FIRST QR PHASE OF THE QR-SVD ALGORITHM.
!
    random_qr = true
!
!   DETERMINE IF A RANDOMIZED PARTIAL AND TRUNCATED QR ALGORITHM IS USED IN THE FIRST PHASE OF THE QR-SVD ALGORITHM.
!
    truncated_qr = false
!
!   DETERMINE THE BLOCK SIZE blk_size FOR THE RANDOMIZED PARTIAL QR PHASE OF THE QR-SVD ALGORITHM.
!
!    blk_size = 40_i4b
    blk_size = 30_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover FOR THE RANDOMIZED PARTIAL QR PHASE OF THE QR-SVD ALGORITHM.
!
    nover = 20_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover_svd FOR THE FINAL SVD PHASE OF THE QR-SVD ALGORITHM.
!
    nover_svd = 20_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), singval(nsvd), leftvec(m,n), rightvec(n,nsvd),   &
              singval0(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( singval0 ) ) then
!
            if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', singval0(:nsvd0) )
!
!       GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
        call gen_random_mat( singval0(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( singval0(:nsvd0) )
!
    end if
!
!   MAKE A COPY OF THE INPUT MATRIX FOR LATER USE.
!
    leftvec(:m,:n) = a(:m,:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rqr_svd_cmp COMPUTES AN APPROXIMATE PARTIAL SINGULAR VALUE DECOMPOSITION (SVD)
!   OF A REAL m-BY-n MATRIX a. THE PARTIAL SVD IS WRITTEN
!
!                       U * S * V**(t)
!
!   WHERE S IS AN nsvd-BY-nsvd MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN m-BY-nsvd ORTHONORMAL MATRIX, AND
!   V IS AN n-BY-nsvd ORTHONORMAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE nsvd LARGEST SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a.
!
    call rqr_svd_cmp( leftvec(:m,:n), singval(:nsvd), failure, rightvec(:n,:nsvd),       &
                      random_qr=random_qr, truncated_qr=truncated_qr, blk_size=blk_size, &
                      nover=nover, nover_svd=nover_svd,  max_francis_steps=10_i4b        )
!
!   THE ROUTINE RETURNS APPROXIMATIONS FOR THE nsvd LARGEST SINGULAR VALUES AND THE ASSOCIATED
!   LEFT AND RIGHT SINGULAR VECTORS.
!
!   ON EXIT OF rqr_svd_cmp :
!
!       singval CONTAINS THE nsvd LARGEST SINGULAR VALUES OF a IN DECREASING ORDER.
!
!       leftvec(:m,:nsvd) CONTAINS THE ASSOCIATED nsvd LEFT SINGULAR VECTORS,
!       STORED COLUMNWISE;
!   
!       rightvec CONTAINS THE ASSOCIATED nsvd RIGHT SINGULAR VECTORS,
!       STORED COLUMNWISE;
!
!       failure= false :  INDICATES SUCCESSFUL EXIT IN THE SVD PHASE OF THE ALGORITHM.
!       failure= true  :  INDICATES THAT THE SVD IN THE SECOND PHASE OF THE ALGORITHM DID NOT
!                         CONVERGE AND THAT FULL ACCURACY WAS NOT ATTAINED IN THE (RANDOMIZED)
!                         PARTIAL QR-SVD ALGORITHM.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (singval(:nsvd)/anorm)**2 )
!
    if ( tmp>=zero ) then
!
        relerr = sqrt( tmp )
!
    else
!
        relerr = -one
!
        failure = true
!
    end if
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    if ( mat_type>0_i4b ) then
!
        if ( nsvd0>nsvd ) then
            relerr2 = norm( singval0(nsvd+1_i4b:nsvd0)/anorm )
        else
            relerr2 = zero
        end if
!
!       COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
        err = abs( relerr - relerr2 )
!
    else
!
        err = zero
!
    end if
!
!   TEST ACCURACY OF THE COMPUTED SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(m,nsvd), id(nsvd,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            i = min( nsvd, nsvd0 )
!
            id(:i,1_i4b) = singval0(:i)
            id(i+1_i4b:nsvd,1_i4b) = zero
!
            where( id(:nsvd,1_i4b)/=zero )
                res(:nsvd,1_i4b) = id(:nsvd,1_i4b)
            elsewhere
                res(:nsvd,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( singval(:nsvd) - id(:nsvd,1_i4b) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (singval(:nsvd) - id(:nsvd,1_i4b))/res(:nsvd,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:n,:nsvd) - u(:n,:nsvd)*diag(singval(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        res(:m,:nsvd) = matmul(a,rightvec) - leftvec(:m,:nsvd)*spread(singval(:nsvd),dim=1,ncopies=m)
        id(:nsvd,1_i4b) = norm( res(:m,:nsvd), dim=2_i4b )
!
        err1 = maxval( id(:nsvd,1_i4b) )/( anorm*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:m,:nsvd)**(t)*u(:m,:nsvd).
!
        call unit_matrix( id(:nsvd,:nsvd) )
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(leftvec(:m,:nsvd)), leftvec(:m,:nsvd) ) )
!
        err2 = maxval( res(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:n,:nsvd)**(t)*v(:n,:nsvd).
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( res(:nsvd,:nsvd) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, singval0, singval, leftvec, rightvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial SVD                                  = ', &
                      nsvd
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-rSVD||_F / ||A||_F        = ', &
                      relerr
!
    if ( mat_type>0_i4b ) then
!
        write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                          relerr2
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) 'FAILURE ( from rqr_svd_cmp() ) = ', failure
!
    if ( do_test ) then
!
        write (prtunit,*)
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rqr_svd_cmp
! ==============================
!
end program ex1_rqr_svd_cmp

ex1_rqr_svd_cmp_fixed_precision.F90

program ex1_rqr_svd_cmp_fixed_precision
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial SVD with a (randomized)
!   QR-SVD algorithm, which fullfills a given relative error in Frobenius norm using
!   subroutine RQR_SVD_CMP_FIXED_PRECISION in module SVD_Procedures.
!                                                                              
!                                                                              
! LATEST REVISION : 24/03/2022
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror, &
                         norm, unit_matrix, random_seed_, random_number_, gen_random_mat,   &
                         rqr_svd_cmp_fixed_precision, merror, allocate_error
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn).
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000, mn=min(m,n), nsvd0=3000
!   
! relerr0 IS THE REQUESTED RELATIVE ERROR OF THE RESTRICTED SVD IN FROBENIUS NORM,
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7.
!   
    real(stnd), parameter  :: relerr0=0.5_stnd, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of rqr_svd_cmp_fixed_precision'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, anorm, relerr, relerr2, tmp, tmp2
    real(stnd), dimension(:,:), allocatable :: a, leftvec, res, id
    real(stnd), dimension(:),   allocatable :: singval0
!
    real(stnd), dimension(:,:), pointer :: rightvec
    real(stnd), dimension(:),   pointer :: singval
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: blk_size, nover, i, nsvd, mat_type
!
    logical(lgl) :: random_qr, failure, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : APPROXIMATE PARTIAL SVD OF A m-BY-n REAL MATRIX USING A (RANDOMIZED)
!               QR-SVD ALGORITHM, WHICH FULLFILLS A PRESCRIBED TOLERANCE
!               FOR ITS RELATIVE ERROR IN THE FROBENIUS NORM.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL SVD.
!
    eps = 0.05_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED QR-SVD ALGORITHM.
!
!   DETERMINE IF A RANDOMIZED PARTIAL QR ALGORITHM IS USED IN THE FIRST QR PHASE OF THE QR-SVD ALGORITHM.
!
    random_qr = true
!
!   DETERMINE THE BLOCK SIZE blk_size FOR THE RANDOMIZED PARTIAL QR PHASE OF THE QR-SVD ALGORITHM.
!
    blk_size = 30_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover FOR THE RANDOMIZED PARTIAL QR PHASE OF THE QR-SVD ALGORITHM.
!
    nover = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), leftvec(m,n), singval0(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( singval0 ) ) then
!
        if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
    anorm = norm( singval0(:nsvd0) )
!
!   MAKE A COPY OF THE INPUT MATRIX FOR LATER USE.
!
    leftvec(:m,:n) = a(:m,:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rqr_svd_cmp_fixed_precision COMPUTES AN APPROXIMATE  PARTIAL SINGULAR VALUE DECOMPOSITION (SVD)
!   OF A REAL m-BY-n MATRIX a, WITH A RANK AS SMALL AS POSSIBLE, BUT WHICH FULFILLS A PRESET
!   TOLERANCE FOR ITS RELATIVE ERROR IN THE FROBENIUS NORM:
!
!            ||a-rSVD||_F <= ||a||_F * relerr
!
!   , WHERE rSVD IS THE COMPUTED PARTIAL SVD APPROXIMATION, || ||_F IS THE FROBENIUS NORM AND
!   relerr IS A PRESCRIBED ACCURACY TOLERANCE FOR THE RELATIVE ERROR OF THE COMPUTED PARTIAL SVD
!   APPROXIMATION, SPECIFIED IN THE INPUT ARGUMENT relerr.
!
!   THE PARTIAL SVD IS WRITTEN
!
!                       U * S * V'
!
!   WHERE S IS AN nsvd-BY-nsvd MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN m-BY-nsvd ORTHONORMAL MATRIX, AND
!   V IS AN n-BY-nsvd ORTHONORMAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE nsvd LARGEST SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   HERE THE RANK OF THE PARTIAL SVD IS NOT KNOWN IN ADVANCE AND THE ARGUMENTS singval
!   AND rightvec, WHICH WILL CONTAIN THE SINGULAR VALUES AND THE ASSOCIATED RIGHT SINGULAR
!   VECTORS IN EXIT OF THE SUBROUTINE MUST BE SPECIFIED AS REAL ARRAY POINTERS.
!   On EXIT, nsvd = size( singval ) IS THE RANK OF THE COMPUTED PARTIAL SVD.
!
!   SET THE TOLERANCE FOR THE RELATIVE ERROR IN FROBENIUS NORM.
!
    relerr = relerr0
!
!   NULLIFY THE POINTERS singval, rightvec SO THAT THEIR STATUT CAN BE CHECKED INSIDE
!   rqr_svd_cmp_fixed_precision SUBROUTINE.
!
    nullify( singval, rightvec )
!
    call rqr_svd_cmp_fixed_precision( leftvec(:m,:n), relerr, singval, failure, rightvec,  &
                                      random_qr=random_qr, blk_size=blk_size, nover=nover, &
                                      max_francis_steps=10_i4b )
!
!   THE ROUTINE RETURNS THE nsvd LARGEST SINGULAR VALUES AND THE ASSOCIATED LEFT
!   AND RIGHT SINGULAR VECTORS, WHICH FULFILLS THE PRESET TOLERANCE SPECIFIED IN ARGUMENT
!   relerr.
!
!   ON EXIT OF rqr_svd_cmp_fixed_precision :
!
!       relerr CONTAINS THE RELATIVE ERROR IN FROBENIUS NORM OF THE COMPUTED PARTIAL SVD.
!
!       POINTER singval CONTAINS THE nsvd = size(singval) LARGEST SINGULAR VALUES OF a
!       IN DECREASING ORDER.
!
!       ARRAY ARGUMENT leftvec(:m,:nsvd) CONTAINS THE ASSOCIATED nsvd LEFT SINGULAR VECTORS
!       STORED COLUMNWISE.
!   
!       POINTER rightvec CONTAINS THE ASSOCIATED nsvd RIGHT SINGULAR VECTORS
!       STORED COLUMNWISE.
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE SVD IN THE SECOND PHASE OF THE ALGORITHM DID NOT
!                         CONVERGE AND THAT FULL ACCURACY WAS NOT ATTAINED IN THE (RANDOMIZED)
!                         PARTIAL QR-SVD ALGORITHM.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   GET THE RANK OF THE COMPUTED PARTIAL SVD.
!
    nsvd = size( singval )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    if ( nsvd0>nsvd ) then
        relerr2 = norm( singval0(nsvd+1_i4b:nsvd0)/anorm )
    else
        relerr2 = zero
    end if
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE COMPUTED SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(m,nsvd), id(nsvd,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:n,:nsvd) - u(:m,:nsvd)*diag(singval(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        res(:m,:nsvd) = matmul(a,rightvec) - leftvec(:m,:nsvd)*spread(singval(:nsvd),dim=1,ncopies=m)
        id(:nsvd,1_i4b) = norm( res(:m,:nsvd), dim=2_i4b )
!
        if ( anorm==zero ) then
            anorm = one
        end if
!
        err1 = maxval( id(:nsvd,1_i4b) )/( anorm*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:m,:nsvd)**(t)*u(:m,:nsvd).
!
        call unit_matrix( id(:nsvd,:nsvd) )
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(leftvec(:m,:nsvd)), leftvec(:m,:nsvd) ) )
!
        err2 = maxval( res(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:n,:nsvd)**(t)*v(:n,:nsvd).
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( res(:nsvd,:nsvd) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS AND POINTERS.
!
    deallocate( a, singval0, singval, leftvec, rightvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial SVD                                  = ', &
                      nsvd
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-rSVD||_F / ||A||_F        = ', &
                      relerr, ' < ', relerr0
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                      relerr2
    write (prtunit,*) 
!
    write (prtunit,*) 
    write (prtunit,*) 'FAILURE ( from rqr_svd_cmp_fixed_precision() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*)
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rqr_svd_cmp_fixed_precision
! ==============================================
!
end program ex1_rqr_svd_cmp_fixed_precision

ex1_rsvd_cmp.F90

program ex1_rsvd_cmp
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate partial SVD with randomized power,
!   subspace or block Krylov iterations using subroutine RSVD_CMP in module SVD_Procedures.
!                                                                              
!                                                                              
! LATEST REVISION : 03/06/2022
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, merror, allocate_error,  &
                         norm, unit_matrix, random_seed_, random_number_, rsvd_cmp, gen_random_mat,          &
                         singval_sort
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn)
! FOR CASES GREATER THAN 0,
! nsvd IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit=6, m=10000, n=10000, mn=min(m,n), nsvd0=5000, nsvd=20
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of rsvd_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, anorm, tmp, tmp2, relerr, relerr2, &
                  abs_err, rel_err
    real(stnd), dimension(:,:), allocatable :: a, leftvec, rightvec, res, id
    real(stnd), dimension(:),   allocatable :: singval, singval0
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: niter, nover, i, mat_type
!
    logical(lgl) :: failure, extd_samp, ortho, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : APPROXIMATE PARTIAL SVD OF A m-BY-n REAL MATRIX USING RANDOMIZED
!               POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 8_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL SVD.
!
    eps = 0.01_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED ALGORITHM.
!
!   DETERMINE THE NUMBER OF POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS niter TO BE PERFORMED.
!
    niter = 2_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE nover .
!
    nover = 10_i4b
!
!   SPECIFY IF POWER SUBSPACE OR BLOCK KRYLOV ITERATIONS ARE USED.
!
    extd_samp = false
!
!   DETERMINE IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE OR BLOCK KRYLOV ITERATIONS, TO AVOID LOSS
!   OF ACCURACY DUE TO ROUNDING ERRORS.
!
    ortho = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), singval(nsvd), leftvec(m,nsvd), rightvec(n,nsvd),   &
              singval0(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( singval0 ) ) then
!
            if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', singval0(:nsvd0) )
!
!       GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
        call gen_random_mat( singval0(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( singval0(:nsvd0) )
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rsvd_cmp COMPUTES A PARTIAL SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   m-BY-n MATRIX a. THE PARTIAL SVD IS WRITTEN
!
!                       U * S * V**(t)
!
!   WHERE S IS AN nsvd-BY-nsvd MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN m-BY-nsvd ORTHONORMAL MATRIX, AND
!   V IS AN n-BY-nsvd ORTHONORMAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE nsvd LARGEST SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a.
!
    call rsvd_cmp( a(:m,:n), singval(:nsvd), leftvec(:m,:nsvd), rightvec(:n,:nsvd),    &
                   failure=failure, niter=niter, nover=nover, extd_samp=extd_samp,     &
                   ortho=ortho )
!
!   THE ROUTINE RETURNS THE nsvd LARGEST SINGULAR VALUES AND THE ASSOCIATED LEFT
!   AND RIGHT SINGULAR VECTORS.
!
!   ON EXIT OF rsvd_cmp :
!
!       singval CONTAINS THE nsvd LARGEST SINGULAR VALUES OF a IN DECREASING ORDER.
!
!       leftvec CONTAINS THE ASSOCIATED nsvd LEFT SINGULAR VECTORS,
!       STORED COLUMNWISE;
!   
!       rightvec CONTAINS THE ASSOCIATED nsvd RIGHT SINGULAR VECTORS,
!       STORED COLUMNWISE;
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE RANDOMIZED
!                         SUBSPACE ITERATIONS. THE RESULTS CAN BE STILL USEFUL,
!                         BUT THE APPROXIMATIONS OF SOME OF THE nsvd TOP SINGULAR
!                         TRIPLETS CAN BE POOR.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE THE ESTIMATED RELATIVE ERROR.
!
    tmp = one - sum( (singval(:nsvd)/anorm)**2 )
!
    if ( tmp>=zero ) then
!
        relerr = sqrt( tmp )
!
    else
!
        relerr = -one
!
        failure = true
!
    end if
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    if ( mat_type>0_i4b ) then
!
        if ( nsvd0>nsvd ) then
            relerr2 = norm( singval0(nsvd+1_i4b:nsvd0)/anorm )
        else
            relerr2 = zero
        end if
!
!       COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
        err = abs( relerr - relerr2 )
!
    else
!
        err = zero
!
    end if
!
!   TEST ACCURACY OF THE COMPUTED SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(m,nsvd), id(nsvd,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            i = min( nsvd, nsvd0 )
!
            id(:i,1_i4b) = singval0(:i)
            id(i+1_i4b:nsvd,1_i4b) = zero
!
            where( id(:nsvd,1_i4b)/=zero )
                res(:nsvd,1_i4b) = id(:nsvd,1_i4b)
            elsewhere
                res(:nsvd,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( singval(:nsvd) - id(:nsvd,1_i4b) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (singval(:nsvd) - id(:nsvd,1_i4b))/res(:nsvd,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:n,:nsvd) - u(:m,:nsvd)*diag(singval(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        res(:m,:nsvd) = matmul(a,rightvec) - leftvec*spread(singval(:nsvd),dim=1,ncopies=m)
        id(:nsvd,1_i4b) = norm( res(:m,:nsvd), dim=2_i4b )
!
        err1 = maxval( id(:nsvd,1_i4b) )/( anorm*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:m,:nsvd)**(t)*u(:m,:nsvd).
!
        call unit_matrix( id(:nsvd,:nsvd) )
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( res(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:n,:nsvd)**(t)*v(:n,:nsvd).
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( res(:nsvd,:nsvd) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, singval0, singval, leftvec, rightvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial SVD                                  = ', &
                      nsvd
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-rSVD||_F / ||A||_F        = ', &
                      relerr
!
    if ( mat_type>0_i4b ) then
!
        write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                          relerr2
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) 'FAILURE ( from rsvd_cmp() ) = ', failure
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rsvd_cmp
! ===========================
!
end program ex1_rsvd_cmp

ex1_rsvd_cmp_fixed_precision.F90

program ex1_rsvd_cmp_fixed_precision
!
!
! Purpose
! =======
!
!   This program illustrates how to compute an approximate reduced SVD with randomized
!   subspace iterations, which fullfills a given relative error in Frobenius norm using
!   subroutine RSVD_CMP_FIXED_PRECISION in module SVD_Procedures.
!                                                                              
!                                                                              
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, allocate_error,  &
                         merror, norm, unit_matrix, random_seed_, random_number_, gen_random_mat,    &
                         rsvd_cmp_fixed_precision
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn).
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000, mn=min(m,n), nsvd0=3000
!   
! relerr0 IS THE REQUESTED RELATIVE ERROR OF THE RESTRICTED SVD IN FROBENIUS NORM,
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: relerr0=0.5_stnd, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of rsvd_cmp_fixed_precision'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, elapsed_time, anorm, relerr, relerr2, tmp, tmp2
    real(stnd), dimension(:,:), allocatable :: a, res, id
    real(stnd), dimension(:),   allocatable :: singval0
!
    real(stnd), dimension(:,:), pointer :: leftvec, rightvec
    real(stnd), dimension(:),   pointer :: singval
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: blk_size, niter, niter_qb, maxiter_qb, i, nsvd, mat_type
!
    logical(lgl) :: failure_relerr, failure, do_test, ortho, reortho
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : APPROXIMATE PARTIAL SVD OF A m-BY-n REAL MATRIX USING RANDOMIZED
!               POWER SUBSPACE ITERATIONS. THE RANK OF THE PARTIAL SVD IS NOT KNOWN
!               IN ADVANCE AND IS DETERMINED SUCH THAT THE ASSOCIATED SVD APPROXIMATION
!               FULLFILLS A PRESCRIBED RELATIVE ERROR IN THE FROBENIUS NORM.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET TOLERANCE FOR THE ERROR OF THE RELATIVE ERROR IN FROBENIUS NORM
!   OF THE COMPUTED PARTIAL SVD.
!
    eps = 0.01_stnd
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SINGULAR TRIPLETS.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RANDOMIZED SVD ALGORITHM.
!
!   blk_size*maxiter_qb IS THE MAXIMUM ALLOWABLE RANK OF THE
!   PARTIAL SVD, WHICH IS SOUGHT.
!
    blk_size   = 10_i4b
    maxiter_qb = 20_i4b
!
!   DETERMINE THE NUMBER OF POWER OR SUBSPACE ITERATIONS niter TO BE PERFORMED
!   IN THE FIRST STEP OF THE QB FACTORIZATION.
!
    niter = 1_i4b
!
!   SPECIFY IF ORTHONORMALIZATION IS CARRIED OUT BETWEEN EACH STEP
!   OF THE (POWER) SUBSPACE ITERATIONS, TO AVOID LOSS OF ACCURACY DUE
!   TO ROUNDING ERRORS.
!
    ortho = true
!
!   SPECIFY IF REORTHONORMALIZATION IS CARRIED OUT TO AVOID LOSS OF ORTHOGONALITY
!   IN THE BLOCK GRAM-SCHMIDT ORTHOGONALISATION SCHEME USED TO BUILD THE ORTHONORMAL
!   MATRIX OF THE QB DECOMPOSITION OF THE INPUT MATRIX.
!
    reortho = true
!
!   DETERMINE THE NUMBER OF SUBSPACE ITERATIONS niter_qb TO BE PERFORMED
!   IN THE LAST STEP OF THE QB FACTORIZATION.
!
    niter_qb = 1_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), singval0(nsvd0), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
#ifdef _F2003
    if ( ieee_support_datatype( singval0 ) ) then
!
        if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
            call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
        end if
!
    end if
#endif
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
    anorm = norm( singval0(:nsvd0) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   rsvd_cmp_fixed_precision COMPUTES A PARTIAL SINGULAR VALUE DECOMPOSITION (SVD)
!   OF A REAL m-BY-n MATRIX a, WITH A RANK AS SMALL AS POSSIBLE, BUT WHICH FULFILLS A PRESET
!   TOLERANCE FOR ITS RELATIVE ERROR IN THE FROBENIUS NORM:
!
!            ||A-rSVD||_F <= ||A||_F * relerr
!
!   , WHERE rSVD IS THE COMPUTED PARTIAL SVD APPROXIMATION, || ||_F IS THE FROBENIUS NORM AND
!   relerr IS A PRESCRIBED ACCURACY TOLERANCE FOR THE RELATIVE ERROR OF THE COMPUTED PARTIAL SVD
!   APPROXIMATION, SPECIFIED IN THE INPUT ARGUMENT relerr.
!
!   THE PARTIAL SVD IS WRITTEN
!
!                       U * S * V**(t)
!
!   WHERE S IS AN nsvd-BY-nsvd MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   DIAGONAL ELEMENTS, U IS AN m-BY-nsvd ORTHONORMAL MATRIX, AND
!   V IS AN n-BY-nsvd ORTHONORMAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE nsvd LARGEST SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   HERE THE RANK OF THE PARTIAL SVD IS NOT KNOWN IN ADVANCE AND THE ARGUMENTS singval, leftvec
!   AND rightvec, WHICH WILL CONTAIN THE SINGULAR TRIPLETS MUST BE SPECIFIED AS REAL ARRAY
!   POINTERS. On EXIT, nsvd = size( singval ) IS THE RANK OF THE COMPUTED PARTIAL SVD.
!
!   SET THE TOLERANCE FOR THE RELATIVE ERROR IN FROBENIUS NORM.
!
    relerr = relerr0
!
!   NULLIFY THE POINTERS singval, leftvec AND rightvec SO THAT THEIR STATUT CAN BE CHECKED INSIDE
!   rsvd_cmp_fixed_precision SUBROUTINE.
!
    nullify( singval, leftvec, rightvec )
!
    call rsvd_cmp_fixed_precision( a(:m,:n), relerr, singval, leftvec, rightvec,          &
                                   failure_relerr=failure_relerr, failure=failure,        &
                                   niter=niter, blk_size=blk_size, maxiter_qb=maxiter_qb, &
                                   ortho=ortho, reortho=reortho, niter_qb=niter_qb,       &
                                   max_francis_steps=10_i4b )
!
!   THE ROUTINE RETURNS THE nsvd LARGEST SINGULAR VALUES AND THE ASSOCIATED LEFT
!   AND RIGHT SINGULAR VECTORS, WHICH FULFILLS THE PRESET TOLERANCE SPECIFIED IN ARGUMENT
!   relerr.
!
!   ON EXIT OF rsvd_cmp_fixed_precision :
!
!       relerr CONTAINS THE RELATIVE ERROR IN FROBENIUS NORM OF THE COMPUTED PARTIAL SVD.
!
!       POINTER singval CONTAINS THE nsvd = size(singval) LARGEST SINGULAR VALUES OF a
!       IN DECREASING ORDER.
!
!       POINTER leftvec CONTAINS THE ASSOCIATED nsvd LEFT SINGULAR VECTORS
!       STORED COLUMNWISE.
!   
!       POINTER rightvec CONTAINS THE ASSOCIATED nsvd RIGHT SINGULAR VECTORS,
!       STORED COLUMNWISE.
!
!       failure_relerr = false : INDICATES SUCCESSFUL EXIT AND THE COMPUTED PARTIAL
!       SVD FULFILLS THE REQUESTED RELATIVE ERROR SPECIFIED ON ENTRY IN THE ARGUMENT relerr.
!
!       failure_relerr = true  : INDICATES THAT THE COMPUTED PARTIAL SVD HAS A RELATIVE ERROR
!       LARGER THAN THE REQUESTED RELATIVE ERROR. THIS MEANS THAT THE REQUESTED ACCURACY TOLERANCE
!       FOR THE RELATIVE ERROR IS TOO SMALL (I.E., relerr < 2 * sqrt( epsilon( relerr )/relerr )
!       OR THAT THE INPUT PARAMETERS blk_size AND/OR maxiter_qb HAVE A TOO SMALL VALUE (E.G., THE
!       PRODUCT blk_size*maxiter_qb SETS THE MAXIMUM ALLOWABLE RANK FOR THE PARTIAL SVD, WHICH IS SOUGHT),
!       GIVEN THE DISTRIBUTION OF THE SINGULAR VALUES OF mat, AND MUST BE INCREASED TO FULLFILL THE PRESET
!       ACCURACY TOLERANCE FOR THE RELATIVE ERROR OF THE PARTIAL SVD APPROXIMATION.
!
!       failure = false : INDICATES SUCCESSFUL EXIT AND AN ACCURATE APPROXIMATION OF THE SINGULAR TRIPLETS
!
!       failure = true  : INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE RANDOMIZED
!                         SUBSPACE ITERATIONS. THE RESULTS CAN BE STILL USEFUL, ESPECIALLY
!                         IF failure_relerr = false ON EXIT, BUT THE
!                         APPROXIMATIONS OF THE TOP nsvd SINGULAR TRIPLETS
!                         CAN BE POOR.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   GET THE RANK OF THE COMPUTED PARTIAL SVD.
!
    nsvd = size( singval )
!
!   COMPUTE THE TRUE RELATIVE ERROR.
!
    if ( nsvd0>nsvd ) then
        relerr2 = norm( singval0(nsvd+1_i4b:nsvd0)/anorm )
    else
        relerr2 = zero
    end if
!
!   COMPUTE ERROR BETWEEN THE TRUE AND ESTIMATED RELATIVE ERRORS.
!
    err = abs( relerr - relerr2 )
!
!   TEST ACCURACY OF THE COMPUTED SINGULAR TRIPLETS IF REQUIRED.
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(m,nsvd), id(nsvd,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:n,:nsvd) - u(:m,:nsvd)*diag(singval(:nsvd)),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        res(:m,:nsvd) = matmul(a,rightvec) - leftvec*spread(singval(:nsvd),dim=1,ncopies=m)
        id(:nsvd,1_i4b) = norm( res(:m,:nsvd), dim=2_i4b )
!
        if ( anorm==zero ) then
            anorm = one
        end if
!
        err1 = maxval( id(:nsvd,1_i4b) )/( anorm*real(mn,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:m,:nsvd)**(t)*u(:m,:nsvd).
!
        call unit_matrix( id(:nsvd,:nsvd) )
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( res(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:n,:nsvd)**(t)*v(:n,:nsvd).
!
        res(:nsvd,:nsvd) = abs( id(:nsvd,:nsvd) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( res(:nsvd,:nsvd) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, id )
!
    end if
!
!   DEALLOCATE WORK ARRAYS AND POINTERS.
!
    deallocate( a, singval0, singval, leftvec, rightvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the approximate partial SVD                                  = ', &
                      nsvd
    write (prtunit,*) 'Relative error in Frobenius norm     : ||A-rSVD||_F / ||A||_F        = ', &
                      relerr
    write (prtunit,*) 'Best relative error in Frobenius norm: min( ||A-rSVD||_F / ||A||_F ) = ', &
                      relerr2
!
    if ( failure_relerr ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Fail to converge within ', maxiter_qb,   &
                          ' iterations! ||A-rSVD||_F / ||A||_F = ', relerr, ' >= ', relerr0
        write (prtunit,*) 
!
    else
!
        write (prtunit,*) 
        write (prtunit,*) 'Converge within less than ', maxiter_qb ,' iterations! ||A-rSVD||_F / ||A||_F = ', &
                          relerr, ' < ', relerr0
        write (prtunit,*) 
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) 'FAILURE_RELERR ( from rsvd_cmp_fixed_precision() ) = ', failure_relerr
    write (prtunit,*) 'FAILURE        ( from rsvd_cmp_fixed_precision() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing approximate ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_rsvd_cmp_fixed_precision
! ===========================================
!
end program ex1_rsvd_cmp_fixed_precision

ex1_rtsw.F90

program ex1_rtsw
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of functions RTSW and TIME_TO_STRING
!   in module Time_Procedures for computing the elapsed time between two times using the
!   system (wall) clock.
!
!
! LATEST REVISION : 01/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, extd, rtsw, time_to_string
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!
    character(len=*), parameter :: name_proc='Example 1 of rtsw'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(extd)   :: tim1, tim2
!
    integer(i4b)  :: i, j
!
    character(len=13) :: string
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   FUNCTION rtsw CAN BE USED TO COMPUTE THE TIME LAPSE BETWEEN
!   FUNCTIONS CALLS ACCORDING TO THE SYSTEM (WALL) CLOCK.
!
!   FUNCTION rtsw OBTAINS, FROM THE INTRINSIC ROUTINE DATE_AND_TIME,
!   THE CURRENT DATE AND TIME. THESE VALUES ARE THEN CONVERTED TO SECONDS 
!   AND RETURNED AS AN EXTENDED PRECISION REAL VALUE.
!
!   THIS FUNCTION WORKS ACROSS MONTH AND YEAR BOUNDARIES, BUT WILL NOT WORK
!   PROPERLY WITH OPENMP (USE FUNCTION elapsed_time IN THIS CASE).
!
!   SINCE THIS ROUTINE USES THE SYSTEM CLOCK, THE ELAPSED TIME COMPUTED
!   WITH THIS ROUTINE MAY NOT (PROBABLY WON'T BE IN A MULTI-TASKING OS)
!   AN ACCURATE REFLECTION OF THE NUMBER OF CPU CYCLES REQUIRED TO
!   PERFORM A CALCULATION. THEREFORE CARE SHOULD BE EXERCISED WHEN
!   USING THIS TO PROFILE A CODE.
!
!   A TYPICAL USE OF THIS FUNCTION IS AS FOLLOWS :
!
    tim1 = rtsw()
    j    = 0
    do i=1, 1000000000
        j = j + 1
    end do
    tim2 = rtsw()
!
!   CONVERT THE ELAPSED TIME tim2-tim1 TO A STRING FORMAT FOR PRINTING AS
!
!           'hours.minutes.seconds.milliseconds'
!
!   WITH SUBROUTINE time_to_string .
!
    string = time_to_string( tim2-tim1 )
!
!   PRINT THE RESULT.
!
    write (prtunit, *)  " Elapsed Time (s): " // string //  " => hours.minutes.seconds.milliseconds "
!
!
! END OF PROGRAM ex1_rtsw
! =======================
!
end program ex1_rtsw

ex1_select_eigval_cmp.F90

program ex1_select_eigval_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_EIGVAL_CMP
!   in module Eig_Procedures for computing the largest or smallest eigenvalues of a
!   real symmetric matrix.
!
!   The initial tridiagonal reduction is parallelized if OpenMP is activated and a
!   rational QR method is used for computing the eigenvalues of the tridiagonal
!   form of the symmetric matrix.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing selected eigenvectors of a real symmetric matrix.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, select_eigval_cmp, &
                         trid_inviter, merror, allocate_error, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX AND neig IS THE NUMBER
! OF THE COMPUTED SMALLEST OR LARGEST EIGENVALUES/EIGENVECTORS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE neig ASSOCIATED EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=2000, neig=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of select_eigval_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:), allocatable   :: d, res
    real(stnd), dimension(:,:), allocatable :: a, a2, eigvec, d_e
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test, small
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : neig LARGEST OR SMALLEST EIGENVALUES/EIGENVECTORS OF A n-BY-n REAL
!               SYMMETRIC MATRIX USING A RATIONAL QR ALGORITHM FOR THE EIGENVALUES
!               AND THE INVERSE ITERATION TECHNIQUE FOR THE EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE IF YOU WANT TO COMPUTE THE neig SMALLEST OR LARGEST EIGENVALUES.
!
    small = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), eigvec(n,neig), d(neig), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), res(neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE FIRST neig LARGEST OR SMALLEST EIGENVALUES OF THE SELF-ADJOINT MATRIX a
!   AND SAVE THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    call select_eigval_cmp( a(:n,:n), d(:neig), small, failure, d_e=d_e )
!
!   ON EXIT OF select_eigval_cmp :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE RATIONAL
!                         QR ALGORITHM.
!
!   THE COMPUTED EIGENVALUES ARE STORED IN d(:neig) AND
!   THE SYMMETRIC MATRIX IS OVERWRITTEN BY DETAILS OF ITS
!   TRIDIAGONAL DECOMPOSITION STORED IN COMPACT FORM.
!
    if ( .not. failure ) then
!
!       COMPUTE THE ASSOCIATED m EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!       maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1), d_e(:n,2), d(:neig), eigvec(:n,:neig), failure2,     &
                           mat=a(:n,:n), maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        a(:n,:neig) = matmul( a2(:n,:n), eigvec(:n,:neig)) - eigvec(:n,:neig)*spread( d(:neig), dim=1, ncopies=n)
        res(:neig) = norm( a(:n,:neig), dim=2_i4b )
!
        err1 = maxval( res(:neig) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:neig,:neig) )
!
        a2(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose(eigvec(:n,:neig)), eigvec(:n,:neig) ) )
!
        err2 = maxval( a2(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        deallocate( a, eigvec, d_e, d, a2, res )
!
    else
!
        deallocate( a, eigvec, d_e, d )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from select_eigval_cmp() ) = ', failure
!
    if ( .not. failure  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter()      ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_select_eigval_cmp
! ====================================
!
end program ex1_select_eigval_cmp

ex1_select_eigval_cmp2.F90

program ex1_select_eigval_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_EIGVAL_CMP2
!   in module Eig_Procedures for computing the largest or smallest eigenvalues of a
!   real symmetric matrix whose sum of absolute values exceeds a given (positive) threshold.
!
!   The initial tridiagonal reduction is parallelized if OpenMP is activated and a
!   rational QR method is used for computing the eigenvalues of the tridiagonal
!   form of the symmetric matrix.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing selected eigenvectors of a real symmetric matrix.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, c100, allocate_error,  &
                         merror, norm, get_diag, select_eigval_cmp2, trid_inviter,      &
                         norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX AND
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE SELECTED EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=1000, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of select_eigval_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, val, elapsed_time
    real(stnd), pointer ,    dimension(:)   :: d
    real(stnd), allocatable, dimension(:)   :: res
    real(stnd), allocatable, dimension(:,:) :: eigvec, a, a2, d_e
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: m
!
    logical(lgl) :: failure, failure2, do_test, small
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE LARGEST OR SMALLEST EIGENVALUES OF A n-BY-n REAL
!               SYMMETRIC MATRIX WHOSE SUM OF ABSOLUTE VALUES EXCEEDS
!               A PRESCRIBED THRESHOLD USING A RATIONAL QR ALGORITHM
!               AND SELECTED EIGENVECTORS BY INVERSE ITERATION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE IF YOU WANT TO COMPUTE THE SMALLEST OR LARGEST EIGENVALUES.
!
    small = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SEMI-DEFINITE POSITIVE MATRIX a .
!
    call random_number( a )
!
    a = matmul( a, transpose( a ) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM SEMI-DEFINITE POSITIVE MATRIX.
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   DETERMINE THRESHOLD FOR THE SUM OF THE EIGENVALUES.
!
    val = sum( get_diag(a) )/c100
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE LARGEST OR SMALLEST EIGENVALUES OF THE SELF-ADJOINT MATRIX a 
!   WHOSE SUM OF ABSOLUTE VALUES EXCEEDS abs( val ) AND SAVE THE INTERMEDIATE 
!   TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    call select_eigval_cmp2( a, d, small, val, failure, d_e=d_e )
!
!   ON EXIT OF select_eigval_cmp2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE RATIONAL
!                         QR ALGORITHM.
!
!   THE COMPUTED EIGENVALUES ARE STORED IN d AND THE
!   SYMMETRIC MATRIX IS OVERWRITTEN BY DETAILS OF ITS
!   TRIDIAGONAL DECOMPOSITION STORED IN COMPACT FORM.
!
!   DETERMINE THE NUMBER OF COMPUTED EIGENVALUES AND ALLOCATE WORK ARRAY FOR
!   COMPUTING THE ASSOCIATED EIGENVECTORS.
!
    m = size( d )
!
    if ( .not. failure .and. m>0 ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( eigvec(n,m), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE ASSOCIATED m EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!       maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1), d_e(:n,2), d(:m), eigvec(:n,:m), failure2,     &
                           mat=a(:n,:n), maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. m>0 ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        a(:n,:m) = matmul( a2(:n,:n), eigvec(:n,:m)) - eigvec(:n,:m)*spread( d(:m), dim=1, ncopies=n)
        res(:m) = norm( a(:n,:m), dim=2_i4b )
!
        err1 = maxval( res(:m) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:m,:m) )
!
        a2(:m,:m) = abs( a(:m,:m) - matmul( transpose(eigvec(:n,:m)), eigvec(:n,:m) ) )
!
        err2 = maxval( a2(:m,:m) )/real(n,stnd)
!
        err = max( err1, err2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        deallocate( a, d_e, d, a2, res )
!
    else
!
        deallocate( a, d_e, d )
!
    end if
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from select_eigval_cmp2() ) = ', failure
!
    if ( .not. failure  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter()       ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure .and. m>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', m, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_select_eigval_cmp2
! =====================================
!
end program ex1_select_eigval_cmp2

ex1_select_eigval_cmp3.F90

program ex1_select_eigval_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_EIGVAL_CMP3
!   in module Eig_Procedures for computing the largest or smallest eigenvalues of a
!   real symmetric matrix.
!
!   The initial tridiagonal reduction is parallelized if OpenMP is activated and a
!   bisection method is used for computing the eigenvalues of the tridiagonal
!   form of the symmetric matrix.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing selected eigenvectors of a real symmetric (tridiagonal) matrix.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, select_eigval_cmp3, &
                         trid_inviter, merror, allocate_error, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX,
! le IS THE NUMBER OF THE WANTED EIGENVALUES/EIGENVECTORS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE le ASSOCIATED EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, le=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of select_eigval_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: eigval, res
    real(stnd), dimension(:,:), allocatable :: a, a2, eigvec, d_e
!
    integer(i4b) :: neig
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test, small
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX USING A BISECTION ALGORITHM FOR THE EIGENVALUES
!               AND THE INVERSE ITERATION TECHNIQUE FOR THE EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE IF YOU WANT TO COMPUTE THE le SMALLEST OR LARGEST EIGENVALUES.
!
    small = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), eigval(n), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE FIRST le EIGENVALUES OF THE SELF-ADJOINT MATRIX a BY BISECTION AND SAVE
!   THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    call select_eigval_cmp3( a, neig, eigval, small, failure, sort=sort, le=le, d_e=d_e )
!
    if ( .not. failure .and. neig>0  ) then
!
!       ALLOCATE WORK ARRAY NEEDED TO STORE THE EIGENVECTORS.
!
        allocate( eigvec(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE ASSOCIATED neig EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!       maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1), d_e(:n,2), eigval(:neig), eigvec, failure2,     &
                           mat=a, maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. neig>0 ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE eigval ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        a(:n,:neig) = matmul( a2(:n,:n), eigvec(:n,:neig)) - eigvec(:n,:neig)*spread( eigval(:neig), dim=1, ncopies=n)
        res(:neig)  = norm( a(:n,:neig), dim=2_i4b )
!
        err1 = maxval( res(:neig) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:neig,:neig) )
!
        a2(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose(eigvec(:n,:neig)), eigvec(:n,:neig) ) )
!
        err2 = maxval( a2(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        deallocate( a, d_e, eigval, a2, res )
!
    else
!
        deallocate( a, d_e, eigval )
!
    end if
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from select_eigval_cmp3() ) = ', failure
!
    if ( .not. failure  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter()       ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure .and. neig>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_select_eigval_cmp3
! =====================================
!
end program ex1_select_eigval_cmp3

ex1_select_singval_cmp.F90

program ex1_select_singval_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP
!   in module SVD_Procedures for computing a full or partial SVD of a real matrix using
!   the Chan-Golub-Reinsch bidiagonal reduction algorithm, the bisection or dqds algorithm for
!   singular values and the inverse iteration method for singular vectors.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Chan-Golub-Reinsch bidiagonal reduction, dqds, bisection and inverse iteration
!   algorithms are used.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c0_9, c1_5, c30, c50, c1_e6, safmin, &
                         bd_inviter2, select_singval_cmp, random_seed_, random_number_, gen_random_mat,      &
                         singval_sort, merror, allocate_error, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn) FOR CASES GREATER THAN 0,
! ls IS THE NUMBER OF THE TOP SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS,
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, mn=min(m,n), nsvd0=3000, ls=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of select_singval_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, leftvec, rightvec, resid, rla
    real(stnd), dimension(:),   allocatable :: s, s0, d, e, tauo, tauq, taup, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr, mnthr_nsing, i, mat_type
!
    logical(lgl) :: dqds, failure1, failure2, gen_q, do_test, two_stage
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A n-BY-m REAL MATRIX USING THE CHAN-GOLUB-REINSCH
!               BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE INVERSE ITERATION TECHNIQUE FOR
!               SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = false
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF max(n,m) EXCEEDS mnthr ,
!   A QR OR LQ FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( mn, stnd )*c1_5, i4b )
!
    two_stage = max( n, m )>=mnthr
!    two_stage = true
!    two_stage = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING SINGULAR VECTORS. IF max(n,m)
!   EXCEEDS mnthr (E.G. A PRELIMINARY QR OR QL IS DONE) AND ls
!   EXCEEDS mnthr_nsing, THE ORTHOGONAL MATRIX q OF THE QR OR
!   QL FACTORIZATION IS GENERATED AND THE SINGULAR VECTORS
!   ARE COMPUTED BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( mn, stnd )*c0_9, i4b )
!
    gen_q = ls>=mnthr_nsing .and. two_stage
!    gen_q = true
!    gen_q = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        if ( gen_q ) then
            allocate( a(n,m), rla(mn,mn), s(mn), d(mn), e(mn), &
                      tauq(mn), taup(mn), stat=iok )
        else
            allocate( a(n,m), rla(mn,mn), s(mn), d(mn), e(mn), &
                      tauo(mn), tauq(mn), taup(mn), stat=iok )
        end if
    else
        allocate( a(n,m), s(mn), d(mn), e(mn), tauq(mn), taup(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,mn), resid2(mn), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(mn), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:mn) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G. A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE GOLUB AND REINSCH
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   GOLUB-KAHAN TRIDIAGONAL FORM OF THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE
!   RESULTS WITH SUBROUTINE select_singval_cmp.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ).
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM.
!       THE MATRIX a IS FIRST REDUCED TO TRIANGULAR FORM BY A QR OR LQ FACTORIZATION
!       IN A FIRST STAGE. THE TRIANGULAR MATRIX IS REDUCED TO BIDIAGONAL FORM IN A
!       SECOND STAGE.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM IN THE TWO-STAGE 
!       ALGORITHM ARE STORED IN a, rla, tauo, tauq AND taup.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        if ( gen_q ) then
!
            call select_singval_cmp( a, rla, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                     ls=ls, abstol=abstol, tauq=tauq, taup=taup, dqds=dqds )
!
        else
!
            call select_singval_cmp( a, rla, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                     ls=ls, abstol=abstol, tauo=tauo, tauq=tauq, taup=taup, dqds=dqds )
!
        end if
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN a, tauq, taup.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        call select_singval_cmp( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,   &
                                 ls=ls, abstol=abstol, tauq=tauq, taup=taup, dqds=dqds )
!
    end if
!
!   ON EXIT OF select_singval_cmp :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_inviter2 IN THE SECOND STEP.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM d_e
!   ARE STORED IN PACKED FORM IN a, tauq, taup AND ALSO rla AND tauo IF A
!   TWO-STAGE ALGORITHM HAS BEEN USED.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE THE nsing ASSOCIATED SINGULAR VECTORS OF a BY INVERSE ITERATION
!   WITH SUBROUTINE bd_inviter2 :
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!       INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_inviter2 .
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
        if ( two_stage ) then
!
            if ( gen_q ) then
!
                call bd_inviter2( a, tauq, taup, rla, d, e, s(:nsing), leftvec, rightvec, &
                                 failure=failure2, maxiter=maxiter )
!
            else
!
                call bd_inviter2( a, tauq, taup, rla, d, e, s(:nsing), leftvec, rightvec,  &
                                  failure=failure2, tauo=tauo, maxiter=maxiter )
!
            end if
!
        else
!
            call bd_inviter2( a, tauq, taup, d, e, s(:nsing), leftvec, rightvec,   &
                              failure=failure2, maxiter=maxiter )
!
        end if
!
!       ON EXIT OF bd_inviter2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!       failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL FORM d_e ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF d_e ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED BY BACK TRANSFORMATIONS.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a, RESPECTIVELY.
!
!       bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:nsing)/=zero )
                resid(:nsing,1_i4b) = s0(:nsing)
            elsewhere
                resid(:nsing,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsing) - s0(:nsing) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsing) - s0(:nsing))/resid(:nsing,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nsing) - u(:n,:nsing)*s(:nsing,:nsing),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        resid2(:nsing) = norm( resid(:n,:nsing), dim=2_i4b )
!
        err1 = maxval( resid2(:nsing) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nsing)**(t)*u(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
            deallocate( a2, resid, resid2, s0 )
        else
            deallocate( a2, resid, resid2 )
        end if
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, tauq, taup, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, tauq, taup )
!
    end if
!
    if ( two_stage ) then
!
        if ( gen_q ) then
            deallocate( rla )
        else
            deallocate( rla, tauo )
        end if
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from select_singval_cmp() ) = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_inviter2() )        = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_select_singval_cmp
! =====================================
!
end program ex1_select_singval_cmp

ex1_select_singval_cmp2.F90

program ex1_select_singval_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP2
!   in module SVD_Procedures for computing a full or partial SVD of a real matrix using
!   the Chan-Golub-Reinsch bidiagonal reduction algorithm, a bisection or dqds algorithm for
!   singular values and the inverse iteration method for singular vectors.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Chan-Golub-Reinsch bidiagonal reduction, bisection, dqds and inverse iteration
!   algorithms are used.
!
!   Subroutine SELECT_SINGVAL_CMP2 is faster than SELECT_SINGVAL_CMP for computing singular
!   values, if bisection is used, but may be less accurate for some matrices.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c0_9, c1_5, c30, c50, c1_e6, safmin, &
                         bd_inviter2, select_singval_cmp2, random_seed_, random_number_, gen_random_mat,     &
                         singval_sort, merror, allocate_error, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!
!
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO mn) FOR CASES GREATER THAN 0,
! ls IS THE NUMBER OF THE TOP SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS,
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, mn=min(m,n), nsvd0=3000, ls=20, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of select_singval_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, leftvec, rightvec, resid, rla
    real(stnd), dimension(:),   allocatable :: s, s0, d, e, tauo, tauq, taup, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr, mnthr_nsing, i, mat_type
!
    logical(lgl) :: failure1, failure2, gen_q, two_stage, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A n-BY-m REAL MATRIX USING THE CHAN-GOLUB-REINSCH
!               BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE INVERSE ITERATION TECHNIQUE FOR
!               SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = false
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF max(n,m) EXCEEDS mnthr ,
!   A QR OR LQ FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( mn, stnd )*c1_5, i4b )
!
    two_stage = max( n, m )>=mnthr
!    two_stage = true
!    two_stage = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING SINGULAR VECTORS. IF max(n,m)
!   EXCEEDS mnthr (E.G. A PRELIMINARY QR OR QL IS DONE) AND ls
!   EXCEEDS mnthr_nsing, THE ORTHOGONAL MATRIX q OF THE QR OR
!   QL FACTORIZATION IS GENERATED AND THE SINGULAR VECTORS
!   ARE COMPUTED BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( mn, stnd )*c0_9, i4b )
!
    gen_q = ls>=mnthr_nsing .and. two_stage
!    gen_q = true
!    gen_q = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        if ( gen_q ) then
            allocate( a(n,m), rla(mn,mn), s(mn), d(mn), e(mn), &
                      tauq(mn), taup(mn), stat=iok )
        else
            allocate( a(n,m), rla(mn,mn), s(mn), d(mn), e(mn), &
                      tauo(mn), tauq(mn), taup(mn), stat=iok )
        end if
    else
        allocate( a(n,m), s(mn), d(mn), e(mn), tauq(mn), taup(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,mn), resid2(mn), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(mn), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:mn) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE GOLUB AND REINSCH
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE
!   select_singval_cmp2.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp2 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp2 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
!   select_singval_cmp2 IS FASTER THAN select_singval_cmp, BUT MAY BE LESS ACCURATE FOR SOME
!   MATRICES IF BISECTION IS USED.
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM.
!       THE MATRIX a IS FIRST REDUCED TO TRIANGULAR FORM BY A QR OR LQ FACTORIZATION
!       IN A FIRST STAGE. THE TRIANGULAR MATRIX IS REDUCED TO BIDIAGONAL FORM IN A
!       SECOND STAGE.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM IN THE TWO-STAGE 
!       ALGORITHM ARE STORED IN a, rla, tauo, tauq AND taup.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        if ( gen_q ) then
!
            call select_singval_cmp2( a, rla, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                      ls=ls, abstol=abstol, tauq=tauq, taup=taup, dqds=dqds )
!
        else
!
            call select_singval_cmp2( a, rla, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                      ls=ls, abstol=abstol, tauo=tauo, tauq=tauq, taup=taup,   &
                                      dqds=dqds )
!
        end if
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN a, tauq, taup.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        call select_singval_cmp2( a, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                  ls=ls, abstol=abstol, tauq=tauq, taup=taup, dqds=dqds )
!
    end if
!
!   ON EXIT OF select_singval_cmp2 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_inviter2 IN THE SECOND STEP.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM d_e
!   ARE STORED IN PACKED FORM IN a, tauq, taup AND ALSO rla AND tauo IF A
!   TWO-STAGE ALGORITHM HAS BEEN USED.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp2.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE THE nsing ASSOCIATED SINGULAR VALUES OF a BY INVERSE ITERATION
!   WITH SUBROUTINE bd_inviter2 :
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!       INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_inviter2 .
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
        if ( two_stage ) then
!
            if ( gen_q ) then
!
                call bd_inviter2( a, tauq, taup, rla, d, e, s(:nsing), leftvec, rightvec, &
                                 failure=failure2, maxiter=maxiter )
!
            else
!
                call bd_inviter2( a, tauq, taup, rla, d, e, s(:nsing), leftvec, rightvec,  &
                                  failure=failure2, tauo=tauo, maxiter=maxiter )
!
            end if
!
        else
!
            call bd_inviter2( a, tauq, taup, d, e, s(:nsing), leftvec, rightvec,   &
                              failure=failure2, maxiter=maxiter )
!
        end if
!
!       ON EXIT OF bd_inviter2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!       failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL FORM d_e ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF d_e ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED BY BACK TRANSFORMATIONS.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing SINGULAR VECTORS OF a, RESPECTIVELY.
!
!       bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:nsing)/=zero )
                resid(:nsing,1_i4b) = s0(:nsing)
            elsewhere
                resid(:nsing,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsing) - s0(:nsing) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsing) - s0(:nsing))/resid(:nsing,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nsing) - u(:n,:nsing)*s(:nsing,:nsing),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        resid2(:nsing) = norm( resid(:n,:nsing), dim=2_i4b )
!
        err1 = maxval( resid2(:nsing) )/ ( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nsing)**(t)*u(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
            deallocate( a2, resid, resid2, s0 )
        else
            deallocate( a2, resid, resid2 )
        end if
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( leftvec, rightvec )
!
    end if
!
    if ( two_stage ) then
!
        if ( gen_q ) then
            deallocate( a, rla, s, d, e, tauq, taup )
        else
            deallocate( a, rla, s, d, e, tauo, tauq, taup )
        end if
!
    else
!
        deallocate( a, s, d, e, tauq, taup )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from select_singval_cmp2() ) = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_inviter2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_select_singval_cmp2
! ======================================
!
end program ex1_select_singval_cmp2

ex1_select_singval_cmp3.F90

program ex1_select_singval_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP3
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix using a
!   Chan-Rhala-Barlow one-sided bidiagonal reduction algorithm, a bisection or dqds algorithm for
!   singular values and the inverse iteration method for singular vectors. The real matrix
!   must have more rows than columns.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Chan-Rhala-Barlow one-sided bidiagonal reduction, bisection, dqds, and inverse iteration
!   algorithms are used.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER2 in module SVD_Procedures.
!
! LATEST REVISION : 09/02/2024
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c0_9, c1_5, c30, c50, c1_e6, safmin, &
                         bd_inviter2, select_singval_cmp3, random_seed_, random_number_, gen_random_mat,     &
                         singval_sort, merror, allocate_error, norm, unit_matrix, ifirstloc
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE n>=m),
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX FOR CASES GREATER THAN 0,
! ls IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, n=15000, m=15000, nsvd0=3000,  &
                               ls=15000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of select_singval_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, p, leftvec, rightvec, resid, ra
    real(stnd), dimension(:),   allocatable :: s, s0, d, e, tauo, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr, mnthr_nsing, i, mat_type, nvec
!
    logical(lgl) :: failure1, failure2, failure_bd, gen_p, gen_q, reortho, two_stage, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A n-BY-m REAL MATRIX WITH n>=m USING THE RHALA-BARLOW
!               ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE INVERSE ITERATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = false
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW BIDIAGONAL ALGORITHM.
!
    reortho = true
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF n EXCEEDS mnthr ,
!   A QR FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( m, stnd )*c1_5, i4b )
!
    two_stage = n>=mnthr
!    two_stage = true
!    two_stage = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING RIGHT SINGULAR VECTORS. IF
!   ls EXCEEDS mnthr_nsing, THE RIGHT ORTHOGONAL MATRIX p
!   OF THE ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM IS GENERATED
!   AND BACK-TRANSFORMATION IS DONE BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( m, stnd )*c0_9, i4b )
!
    gen_p = ls>=mnthr_nsing
!    gen_p = true
!    gen_p = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING LEFT SINGULAR VECTORS. IF n
!   EXCEEDS mnthr (E.G. A PRELIMINARY QR IS DONE) AND ls
!   EXCEEDS mnthr_nsing, THE ORTHOGONAL MATRIX q OF THE QR
!   FACTORIZATION IS GENERATED AND THE LEFT SINGULAR VECTORS
!   ARE COMPUTED BY A MATRIX MULTIPLICATION.
!
    gen_q = ls>=mnthr_nsing .and. two_stage
!    gen_q = true
!    gen_q = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
!
        if ( gen_q ) then
            allocate( a(n,m), ra(m,m), s(m), d(m), e(m), p(m,m), stat=iok )
        else
            allocate( a(n,m), ra(m,m), s(m), d(m), e(m), tauo(m), p(m,m), stat=iok )
        end if
!
    else
!
        allocate( a(n,m), s(m), d(m), e(m), p(m,m), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,m), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(m), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:m) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G. A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE RHALA-BARLOW ONE-SIDED
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   GOLUB-KAHAN TRIDIAGONAL FORM OF THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE
!   RESULTS WITH SUBROUTINE select_singval_cmp3.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp3 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp3 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ) IF BISECTION IS USED.
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM.
!       THE MATRIX a IS FIRST REDUCED TO TRIANGULAR FORM BY A QR FACTORIZATION
!       IN A FIRST STAGE. THE TRIANGULAR MATRIX IS THEN REDUCED TO BIDIAGONAL FORM IN A
!       SECOND STAGE.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM IN THE TWO-STAGE 
!       ALGORITHM ARE STORED IN a, ra, tauo AND p .
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        if ( gen_q ) then
!
            call select_singval_cmp3( a, ra, nsing, s, failure=failure1, sort=sort, d=d, e=e,  &
                                      ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                                      dqds=dqds, failure_bd=failure_bd )
!
        else
!
            call select_singval_cmp3( a, ra, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                      ls=ls, abstol=abstol, tauo=tauo, p=p, gen_p=gen_p,      &
                                      reortho=reortho, dqds=dqds, failure_bd=failure_bd )
!
        end if
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM ARE STORED IN a AND p .
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        call select_singval_cmp3( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,      &
                                  ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                                  dqds=dqds, failure_bd=failure_bd )
!
    end if
!
!   ON EXIT OF select_singval_cmp3 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   failure_bd= false :  INDICATES SUCCESSFUL EXIT IN THE BIDIAGONAL REDUCTION ALGORITHM.
!   failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS OF ORTHOGONALITY
!                        CAN BE EXPECTED IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!                        OF a FOR THE LAST LEFT SINGULAR VECTORS.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS IS
!   REQUIRED BY SUBROUTINE bd_inviter2 IN THE SECOND STEP.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp3.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE THE nsing ASSOCIATED SINGULAR VALUES OF a BY INVERSE ITERATION
!   WITH SUBROUTINE bd_inviter2.
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!       INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_inviter2 .
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
        if ( two_stage ) then
!
            if ( gen_q ) then
!
                call bd_inviter2( a, ra, p, d, e, s(:nsing), leftvec, rightvec,  &
                                  failure=failure2, maxiter=maxiter )
!
            else
!
                call bd_inviter2( a, ra, p, d, e, s(:nsing), leftvec, rightvec,  &
                                  failure=failure2, tauo=tauo, maxiter=maxiter )
!
            end if
!
        else
!
            call bd_inviter2( a, p, d, e, s(:nsing), leftvec, rightvec,  &
                              failure=failure2, maxiter=maxiter )
!
        end if
!
!       ON EXIT OF bd_inviter2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!       failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL FORM d_e ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF d_e ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED BY BACK TRANSFORMATIONS OR MATRIX MULTIPLICATIONS.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a, RESPECTIVELY.
!
!       bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:nsing)/=zero )
                resid(:nsing,1_i4b) = s0(:nsing)
            elsewhere
                resid(:nsing,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsing) - s0(:nsing) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsing) - s0(:nsing))/resid(:nsing,1_i4b) ) )
!
        end if
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nvec = ifirstloc( logical( s(:nsing)<=tmp, lgl ) ) - 1_i4b
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nvec) - u(:n,:nvec)*s(:nvec,:nvec),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nvec) = matmul(a2,rightvec(:m,:nvec)) - leftvec(:n,:nvec)*spread(s(:nvec),dim=1,ncopies=n)
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 = maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nvec)**(t)*u(:n,:nvec).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(leftvec(:n,:nvec)), leftvec(:n,:nvec) ) )
!
        err2 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
            deallocate( a2, resid, resid2, s0 )
        else
            deallocate( a2, resid, resid2 )
        end if
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, p, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, p )
!
    end if
!
    if ( two_stage ) then
!
        if ( gen_q ) then
            deallocate( ra )
        else
            deallocate( ra, tauo )
        end if
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from select_singval_cmp3() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from select_singval_cmp3() ) = ', failure1
    write (prtunit,*) ' FAILURE    ( from bd_inviter2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 'Number of safe singular triplets                     = ', nvec
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_select_singval_cmp3
! ======================================
!
end program ex1_select_singval_cmp3

ex1_select_singval_cmp3_bis.F90

program ex1_select_singval_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP3
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix using the
!   Rhala-Barlow one-sided bidiagonal reduction algorithm, a bisection or dqds algorithm for
!   singular values and the inverse iteration method for singular vectors. The real matrix
!   must have more rows than columns.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Rhala-Barlow one-sided bidiagonal reduction, bisection, dqds and inverse iteration
!   algorithms are used.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c0_9, c30, c50, c1_e6, safmin,   &
                         bd_inviter2, select_singval_cmp3, random_seed_, random_number_, gen_random_mat, &
                         singval_sort, merror, allocate_error, norm, unit_matrix, ifirstloc
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE n>=m),
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX FOR CASES GREATER THAN 0,
! ls IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, nsvd0=3000,  &
                               ls=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7.
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of select_singval_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, p, leftvec, rightvec, resid, ra
    real(stnd), dimension(:),   allocatable :: s, s0, d, e, tauo, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr_nsing, i, mat_type, nvec
!
    logical(lgl) :: failure1, failure2, failure_bd, gen_p, reortho, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A n-BY-m REAL MATRIX WITH n>=m USING THE RHALA-BARLOW
!               ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE INVERSE ITERATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = false
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW BIDIAGONAL ALGORITHM.
!
    reortho = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING RIGHT SINGULAR VECTORS. IF
!   ls EXCEEDS mnthr_nsing, THE RIGHT ORTHOGONAL MATRIX p
!   OF THE ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM IS GENERATED
!   AND BACK-TRANSFORMATION IS DONE BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( m, stnd )*c0_9, i4b )
!
    gen_p = ls>=mnthr_nsing
!    gen_p = true
!    gen_p = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), s(m), d(m), e(m), p(m,m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,m), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(m), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:m) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE RHALA-BARLOW ONE-SIDED
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   GOLUB-KAHAN TRIDIAGONAL FORM OF THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE
!   RESULTS WITH SUBROUTINE select_singval_cmp3.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp3 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp3 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ) IF BISECTION IS USED.
!
    call select_singval_cmp3( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,      &
                              ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                              dqds=dqds, failure_bd=failure_bd )
!
!   ON EXIT OF select_singval_cmp3 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   failure_bd= false :  INDICATES SUCCESSFUL EXIT IN THE BIDIAGONAL REDUCTION ALGORITHM.
!   failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS OF ORTHOGONALITY
!                        CAN BE EXPECTED IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!                        OF a FOR THE LAST LEFT SINGULAR VECTORS.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM ARE STORED IN a AND p .
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS IS
!   REQUIRED BY SUBROUTINE bd_inviter2 IN THE SECOND STEP.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp3.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE THE nsing ASSOCIATED SINGULAR VALUES OF a BY INVERSE ITERATION
!   WITH SUBROUTINE bd_inviter2.
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!       INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_inviter2 .
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
        call bd_inviter2( a, p, d, e, s(:nsing), leftvec, rightvec,  &
                          failure=failure2, maxiter=maxiter )
!
!       ON EXIT OF bd_inviter2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM.
!       failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE.
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL FORM d_e ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF d_e ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED BY BACK TRANSFORMATIONS OR MATRIX MULTIPLICATIONS.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a, RESPECTIVELY.
!
!       bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:nsing)/=zero )
                resid(:nsing,1_i4b) = s0(:nsing)
            elsewhere
                resid(:nsing,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsing) - s0(:nsing) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsing) - s0(:nsing))/resid(:nsing,1_i4b) ) )
!
        end if
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nvec = ifirstloc( logical( s(:nsing)<=tmp, lgl ) ) - 1_i4b
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nvec) - u(:n,:nvec)*s(:nvec,:nvec),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nvec) = matmul(a2,rightvec(:m,:nvec)) - leftvec(:n,:nvec)*spread(s(:nvec),dim=1,ncopies=n)
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 = maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nvec)**(t)*u(:n,:nvec).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(leftvec(:n,:nvec)), leftvec(:n,:nvec) ) )
!
        err2 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
            deallocate( a2, resid, resid2, s0 )
        else
            deallocate( a2, resid, resid2 )
        end if
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, p, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, p )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from select_singval_cmp3() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from select_singval_cmp3() ) = ', failure1
    write (prtunit,*) ' FAILURE    ( from bd_inviter2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 'Number of safe singular triplets                     = ', nvec
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_select_singval_cmp3
! ======================================
!
end program ex1_select_singval_cmp3

ex1_select_singval_cmp4.F90

program ex1_select_singval_cmp4
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP4
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix using a
!   Chan-Rhala-Barlow one-sided bidiagonal reduction algorithm, a bisection or dqds algorithm for
!   singular values and the inverse iteration method for singular vectors. The real matrix
!   must have more rows than columns.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Chan-Rhala-Barlow one-sided bidiagonal reduction, bisection, dqds and inverse iteration
!   algorithms are used.
!                                                     
!   Subroutine SELECT_SINGVAL_CMP4 is faster than SELECT_SINGVAL_CMP3 for computing singular
!   values, if bisection is used, but may be less accurate for some matrices.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c0_9, c1_5, c30, c50, c1_e6, safmin, &
                         bd_inviter2, select_singval_cmp4, random_seed_, random_number_, gen_random_mat,     &
                         singval_sort, merror, allocate_error, norm, unit_matrix, ifirstloc
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE n>=m),
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX FOR CASES GREATER THAN 0,
! ls IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, nsvd0=3000,  &
                               ls=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of select_singval_cmp4'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, p, leftvec, rightvec, resid, ra
    real(stnd), dimension(:),   allocatable :: s, s0, d, e, tauo, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr, mnthr_nsing, i, mat_type, nvec
!
    logical(lgl) :: failure1, failure2, failure_bd, gen_p, gen_q, reortho, two_stage, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A n-BY-m REAL MATRIX WITH n>=m USING THE RHALA-BARLOW
!               ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE INVERSE ITERATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = false
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW BIDIAGONAL ALGORITHM.
!
    reortho = false
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF n EXCEEDS mnthr ,
!   A QR FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( m, stnd )*c1_5, i4b )
!
    two_stage = n>=mnthr
!    two_stage = true
!    two_stage = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING RIGHT SINGULAR VECTORS. IF
!   ls EXCEEDS mnthr_nsing, THE RIGHT ORTHOGONAL MATRIX p
!   OF THE ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM IS GENERATED
!   AND BACK-TRANSFORMATION IS DONE BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( m, stnd )*c0_9, i4b )
!
    gen_p = ls>=mnthr_nsing
!    gen_p = true
!    gen_p = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING LEFT SINGULAR VECTORS. IF n
!   EXCEEDS mnthr (E.G. A PRELIMINARY QR IS DONE) AND ls
!   EXCEEDS mnthr_nsing, THE ORTHOGONAL MATRIX q OF THE QR
!   FACTORIZATION IS GENERATED AND THE LEFT SINGULAR VECTORS
!   ARE COMPUTED BY A MATRIX MULTIPLICATION.
!
    gen_q = ls>=mnthr_nsing .and. two_stage
!    gen_q = true
!    gen_q = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        if ( gen_q ) then
            allocate( a(n,m), ra(m,m), s(m), d(m), e(m), p(m,m), stat=iok )
        else
            allocate( a(n,m), ra(m,m), s(m), d(m), e(m), tauo(m), p(m,m), stat=iok )
        end if
    else
        allocate( a(n,m), s(m), d(m), e(m), p(m,m), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,m), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(m), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:m) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G. A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE RHALA-BARLOW ONE-SIDED
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE
!   select_singval_cmp4.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp4 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp4 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ).
!
!   select_singval_cmp4 IS FASTER THAN select_singval_cmp3, BUT MAY BE LESS ACCURATE
!   FOR SOME MATRICES IF BISECTION IS USED.
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM.
!       THE MATRIX a IS FIRST REDUCED TO TRIANGULAR FORM BY A QR FACTORIZATION
!       IN A FIRST STAGE. THE TRIANGULAR MATRIX IS REDUCED TO BIDIAGONAL FORM IN A
!       SECOND STAGE.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM IN THE TWO-STAGE 
!       ALGORITHM ARE STORED IN a, ra, tauo AND p.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        if ( gen_q ) then
!
            call select_singval_cmp4( a, ra, nsing, s, failure=failure1, sort=sort, d=d, e=e,  &
                                      ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                                      dqds=dqds, failure_bd=failure_bd )
!
        else
!
            call select_singval_cmp4( a, ra, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                      ls=ls, abstol=abstol, tauo=tauo, p=p, gen_p=gen_p,      &
                                      reortho=reortho, dqds=dqds, failure_bd=failure_bd )
!
        end if
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM ARE STORED IN a AND p .
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        call select_singval_cmp4( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,      &
                                  ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                                  dqds=dqds, failure_bd=failure_bd )
!
    end if
!
!   ON EXIT OF select_singval_cmp4 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   failure_bd= false :  INDICATES SUCCESSFUL EXIT IN THE BIDIAGONAL REDUCTION ALGORITHM
!   failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS OF ORTHOGONALITY
!                        CAN BE EXPECTED IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!                        OF a FOR THE LAST LEFT SINGULAR VECTORS.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS IS
!   REQUIRED BY SUBROUTINE bd_inviter2 IN THE SECOND STEP.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp4.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE THE nsing ASSOCIATED SINGULAR VALUES OF a BY INVERSE ITERATION
!   WITH SUBROUTINE bd_inviter2 :
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!       INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_inviter2 .
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!
        if ( two_stage ) then
!
            if ( gen_q ) then
!
                call bd_inviter2( a, ra, p, d, e, s(:nsing), leftvec, rightvec,  &
                                  failure=failure2, maxiter=maxiter   )
!
            else
!
                call bd_inviter2( a, ra, p, d, e, s(:nsing), leftvec, rightvec,  &
                                  failure=failure2, tauo=tauo, maxiter=maxiter   )
!
            end if
!
        else
!
            call bd_inviter2( a, p, d, e, s(:nsing), leftvec, rightvec,  &
                              failure=failure2, maxiter=maxiter )
!
        end if
!
!       ON EXIT OF bd_inviter2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!       failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE.
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL FORM d_e ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF d_e ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED BY BACK TRANSFORMATIONS OR MATRIX MULTIPLICATIONS.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a, RESPECTIVELY.
!
!       bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:nsing)/=zero )
                resid(:nsing,1_i4b) = s0(:nsing)
            elsewhere
                resid(:nsing,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsing) - s0(:nsing) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsing) - s0(:nsing))/resid(:nsing,1_i4b) ) )
!
        end if
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nvec = ifirstloc( logical( s(:nsing)<=tmp, lgl ) ) - 1_i4b
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nvec) - u(:n,:nvec)*s(:nvec,:nvec),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nvec) = matmul(a2,rightvec(:m,:nvec)) - leftvec(:n,:nvec)*spread(s(:nvec),dim=1,ncopies=n)
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 = maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nvec)**(t)*u(:n,:nvec).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(leftvec(:n,:nvec)), leftvec(:n,:nvec) ) )
!
        err2 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
            deallocate( a2, resid, resid2, s0 )
        else
            deallocate( a2, resid, resid2 )
        end if
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, p, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, p )
!
    end if
!
    if ( two_stage ) then
!
        if ( gen_q ) then
            deallocate( ra )
        else
            deallocate( ra, tauo )
        end if
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from select_singval_cmp4() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from select_singval_cmp4() ) = ', failure1
    write (prtunit,*) ' FAILURE    ( from bd_inviter2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 'Number of safe singular triplets                     = ', nvec
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_select_singval_cmp4
! ======================================
!
end program ex1_select_singval_cmp4

ex1_select_singval_cmp4_bis.F90

program ex1_select_singval_cmp4
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP4
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix using the
!   Rhala-Barlow one-sided bidiagonal reduction algorithm, a bisection or dqds algorithm for
!   singular values and the inverse iteration method for singular vectors. The real matrix
!   must have more rows than columns.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Rhala-Barlow one-sided bidiagonal reduction, bisection, dqds and inverse iteration
!   algorithms are used.
!
!   Subroutine SELECT_SINGVAL_CMP4 is faster than SELECT_SINGVAL_CMP3 for computing singular
!   values, if bisection is used, but may be less accurate for some matrices.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c0_9, c30, c50, c1_e6, safmin,   &
                         bd_inviter2, select_singval_cmp4, random_seed_, random_number_, gen_random_mat, &
                         singval_sort, merror, allocate_error, norm, unit_matrix, ifirstloc
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE n>=m),
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX FOR CASES GREATER THAN 0,
! ls IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, nsvd0=3000,  &
                               ls=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of select_singval_cmp4'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, abstol, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, p, leftvec, rightvec, resid, ra
    real(stnd), dimension(:),   allocatable :: s, s0, d, e, tauo, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr_nsing, i, mat_type, nvec
!
    logical(lgl) :: failure1, failure2, failure_bd, gen_p, reortho, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL SVD OF A n-BY-m REAL MATRIX WITH n>=m USING THE RHALA-BARLOW
!               ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE INVERSE ITERATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 2_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = false
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW BIDIAGONAL ALGORITHM.
!
    reortho = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING RIGHT SINGULAR VECTORS. IF
!   ls EXCEEDS mnthr_nsing, THE RIGHT ORTHOGONAL MATRIX p
!   OF THE ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM IS GENERATED
!   AND BACK-TRANSFORMATION IS DONE BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( m, stnd )*c0_9, i4b )
!
    gen_p = ls>=mnthr_nsing
!    gen_p = true
!    gen_p = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), s(m), d(m), e(m), p(m,m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp -c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,m), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(m), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:m) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE RHALA-BARLOW ONE-SIDED
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE
!   select_singval_cmp4.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp4 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp4 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ) IF BISECTION IS USED.
!
!   select_singval_cmp4 IS FASTER THAN select_singval_cmp3, BUT MAY BE LESS ACCURATE
!   FOR SOME MATRICES IF BISECTION IS USED.
!
    call select_singval_cmp4( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,      &
                              ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                              dqds=dqds, failure_bd=failure_bd )
!
!   ON EXIT OF select_singval_cmp4 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   failure_bd= false :  INDICATES SUCCESSFUL EXIT IN THE BIDIAGONAL REDUCTION ALGORITHM
!   failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS OF ORTHOGONALITY
!                        CAN BE EXPECTED IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!                        OF a FOR THE LAST LEFT SINGULAR VECTORS.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM ARE STORED IN a AND p .
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS IS
!   REQUIRED BY SUBROUTINE bd_inviter2 IN THE SECOND STEP.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp4.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE THE nsing ASSOCIATED SINGULAR VALUES OF a BY INVERSE ITERATION
!   WITH SUBROUTINE bd_inviter2 :
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!       INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_inviter2 .
!
!       ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!       THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
        call bd_inviter2( a, p, d, e, s(:nsing), leftvec, rightvec,  &
                          failure=failure2, maxiter=maxiter          )
!
!       ON EXIT OF bd_inviter2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT IN THE INVERSE ITERATION ALGORITHM
!       failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE.
!
!       THE SINGULAR VECTORS OF THE BIDIAGONAL FORM d_e ARE COMPUTED USING maxiter INVERSE ITERATIONS
!       FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF d_e ARE THEN ORTHOGONALIZED BY 
!       THE MODIFIED GRAM-SCHMIDT ALGORITHM IF THE SINGULAR VALUES ARE NOT WELL SEPARATED.
!       THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED BY BACK TRANSFORMATIONS OR MATRIX MULTIPLICATIONS.
!       ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a, RESPECTIVELY.
!
!       bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:nsing)/=zero )
                resid(:nsing,1_i4b) = s0(:nsing)
            elsewhere
                resid(:nsing,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsing) - s0(:nsing) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsing) - s0(:nsing))/resid(:nsing,1_i4b) ) )
!
        end if
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nvec = ifirstloc( logical( s(:nsing)<=tmp, lgl ) ) - 1_i4b
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nvec) - u(:n,:nvec)*s(:nvec,:nvec),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nvec) = matmul(a2,rightvec(:m,:nvec)) - leftvec(:n,:nvec)*spread(s(:nvec),dim=1,ncopies=n)
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 = maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nvec)**(t)*u(:n,:nvec).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(leftvec(:n,:nvec)), leftvec(:n,:nvec) ) )
!
        err2 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
            deallocate( a2, resid, resid2, s0 )
        else
            deallocate( a2, resid, resid2 )
        end if
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, p, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, p )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from select_singval_cmp4() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from select_singval_cmp4() ) = ', failure1
    write (prtunit,*) ' FAILURE    ( from bd_inviter2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 'Number of safe singular triplets                     = ', nvec
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_select_singval_cmp4
! ======================================
!
end program ex1_select_singval_cmp4

ex1_singvalues.F90

program ex1_singvalues
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function SINGVALUES 
!   in module SVD_Procedures to compute all the singular values of a real
!   matrix.
!
!
! LATEST REVISION : 04/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, svd_cmp, singvalues
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED  MATRIX.
!
    integer(i4b), parameter :: prtunit = 6, m=1000, n=100
!   
!   
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                 :: err
    real(stnd), dimension(n)   :: s, s2
    real(stnd), dimension(m,n) :: a, u
    real(stnd), dimension(n,n) :: v
!
    logical(lgl)   :: failure
!   
    character      :: sort='a'
!   
!
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of singvalues'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A RANDOM DATA MATRIX.
!
    call random_number( a )
!
!   SAVE RANDOM DATA MATRIX FOR LATER USE.
!
    u(:m,:n) = a(:m,:n)
!
!   COMPUTE FULL SVD OF RANDOM DATA MATRIX.
!
    call svd_cmp( u, s, failure, v, sort=sort )
!
!   CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:,:k) - u(:,:k)*s.
!
    err =  sum( abs(matmul(a,v) - u*spread(s,dim=1,ncopies=m)) )/sum( abs(s) )
!
    if ( err<=sqrt(epsilon(err)) .and. .not.failure ) then
!
!       COMPUTE ONLY SINGULAR VALUES OF RANDOM DATA MATRIX.
!
        s2 = singvalues( a, sort=sort )
!
!       CHECK THE RESULTS WITH THE PREVIOUS COMPUTATIONS.
!
        if ( sum(abs(s2-s))<=sqrt(epsilon(err))*maxval(abs(s)) ) then
            write (prtunit,*) name_proc//' is correct'
        else
            write (prtunit,*) name_proc//' is incorrect'
        end if
    else
        write (prtunit,*) name_proc//' not done'
    end if
!
!
! END OF PROGRAM ex1_singvalues
! =============================
!
end program ex1_singvalues

ex1_solve_lin.F90

program ex1_solve_lin
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of fonction SOLVE_LIN
!   in module Lin_Procedures for solving a real linear system with one right
!   hand side.
!
!
! LATEST REVISION : 09/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, half, safmin, true, false, solve_lin, &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE SIZE OF THE LINEAR SYSTEM TO BE SOLVED.
!
    integer(i4b), parameter :: prtunit=6, n=4000
!
    character(len=*), parameter :: name_proc='Example 1 of solve_lin'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, tmp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a
    real(stnd), dimension(:), allocatable   :: b, x, x2, res
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n MATRIX
!               AND ONE RIGHT HAND-SIDE WITH THE LU DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n), x(n), x2(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    a = a - half
!
    call random_number( tmp )
!
    if ( tmp>safmin ) then
        a = a/tmp
    end if
!
!   GENERATE A n-ELEMENT RANDOM SOLUTION VECTOR x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-VECTOR PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION VECTOR FOR LINEAR SYSTEM
!
!            a*x = b
!
!   BY COMPUTING THE LU DECOMPOSITION WITH PARTIAL PIVOTING AND
!   IMPLICIT ROW SCALING OF MATRIX a WITH FUNCTION solve_lin.
!   ARGUMENTS a AND b ARE NOT MODIFIED BY THE FUNCTION.
!
    x2 = solve_lin( a, b )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n) = x2(:n) - x(:n)
        err     = norm(res)/norm(x)
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, x2 )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
!
        write (prtunit,*) name_proc//' is correct'
!
        if ( do_test ) then
!
            write (prtunit,*) 
!
!           PRINT RELATIVE ERROR OF COMPUTED SOLUTION.
!
            write (prtunit,*) 'Relative error of the computed solution = ', err
!
        end if
!
    else
!
        write (prtunit,*) name_proc//' is incorrect'
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the solution of a linear real system of size ', &
       n, ' by ', n, ' is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_solve_lin
! ============================
!
end program ex1_solve_lin

ex1_solve_llsq.F90

program ex1_solve_llsq
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of fonction SOLVE_LLSQ
!   in module LLSQ_Procedures for solving a real linear least-squares problem
!   with one right hand side.
!
!
! LATEST REVISION : 09/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c50, allocate_error, &
                         merror, solve_llsq
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX AND
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=1000, n=100
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of solve_llsq'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: x, res, b
    real(stnd), allocatable, dimension(:,:) :: a
!
    integer(i4b) :: krank
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, min_norm
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SOLVING A LINEAR LEAST SQUARES SYSTEM WITH ONE RIGHT HAND-SIDE:
!
!                              a(:m,:n)*x(:n) ≈ b(:m)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = real( m, stnd)*epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE LLSQ ALGORITHM.
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED.
!
    krank = 0
!
!   DECIDE IF THE MINIMUN 2-NORM SOLUTION(S) MUST BE COMPUTED.
!
    min_norm = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE SOLUTION VECTOR x FOR LINEAR LEAST SQUARES PROBLEM
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OF a . a IS A m-ELEMENTS
!   VECTOR OR AN m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. IF a IS A MATRIX, m>=n OR
!   n>m IS PERMITTED.
!
    x(:n) = solve_llsq( a(:m,:n), b(:m), krank=krank, tol=tol, min_norm=min_norm )
!
!   ON INPUT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED AND IF krank=k,
!   THIS IMPLIES THAT THE FIRST k COLUMNS OF a ARE TO BE FORCED INTO THE BASIS.
!   PIVOTING IS PERFORMED ON THE LAST n-k COLUMNS OF a .
!   
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, SPECIFIC CALCULATIONS TO DETERMINE THE EFFECTIVE RANK a
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS THEN DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IN THE 1-NORM IS LESS THAN 1/tol.
!       IF tol=0 IS SPECIFIED THE NUMERICAL RANK OF a IS DETERMINED.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE EFFECTIVE RANK OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING krank=0  AND tol=RELATIVE PRECISION OF THE ELEMENTS
!   IN a. IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD
!   BE USED. ALSO, IT MAY BE HELPFUL TO SCALE THE COLUMNS OF a SO THAT ALL ELEMENTS 
!   ARE ABOUT THE SAME ORDER OF MAGNITUDE.
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL LOGICAL PARAMETER min_norm IS
!   PRESENT AND IS SET TO true . OTHERWISE, SOLUTION(S) ARE COMPUTED SUCH THAT IF THE jTH COLUMN OF a
!   IS OMITTED FROM THE BASIS, x[j] OR x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(m), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF COEFFICIENT MATRIX a .
!
        res(:m) = b(:m) - matmul( a(:m,:n), x(:n) )
!
        err = maxval( abs( matmul( res(:m), a(:m,:n) ) ) )/ sum( abs(a(:m,:n)) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_solve_llsq
! =============================
!
end program ex1_solve_llsq

ex1_svd_cmp.F90

program ex1_svd_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP 
!   in module SVD_Procedures for computing a full SVD decomposition of a real m-by-n matrix
!   by variants of the Golub-Reinsch bidiagonal reduction algorithm and the Golub-Reinsch
!   bidiagonal QR implicit shift method.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of the
!   Golub-Reinsch bidiagonal reduction and bidiagonal QR algorithms are used.
!
!   SVD_CMP is one of the most robust algorithms available in statpack (with SVD_CMP5)
!   for computing the SVD of a real matrix and performs well the number of available processors
!   is big. If a small set of processors is available, SVD_CMP5 is usually faster.
!   
!
! LATEST REVISION : 07/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6,  &
                         svd_cmp, norm, unit_matrix, random_seed_, random_number_,        &
                         gen_random_mat, singval_sort, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO k)
! FOR CASES GREATER THAN 0.
!
    integer(i4b), parameter :: prtunit = 6, m=3000, n=3000, k=min(m,n), nsvd0=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of svd_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, ulp, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, s0
    real(stnd), allocatable, dimension(:,:) :: a, a2, v, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl) :: failure, perfect_shift, bisect, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FULL SVD OF A REAL MATRIX USING THE BIDIAGONAL QR IMPLICIT METHOD WITH
!               A WAVE-FRONT ALGORITHM FOR APPLYING GIVENS ROTATIONS IN THE BIDIAGONAL
!               QR ALGORITHM AND, OPTIONALLY, A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF A PERFECT SHIFT STRATEGY MUST BE USED
!   IN THE BIDIAGONAL SVD ALGORITHM.
!
    perfect_shift = true
!
!   SPECIFY IF BISECTION IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM TO GIVE HIGHER ACCURACY.
!
    bisect = false
!
!   SPECIFY IF DQDS IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM TO GIVE HIGHER ACCURACY.
!
    dqds = false
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), v(n,k), s(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), resid(m,k), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(k), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:k) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   svd_cmp COMPUTES THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   m-BY-n MATRIX a. THE SVD IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN m-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN m-BY-m ORTHOGONAL MATRIX, AND
!   V IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   BY DEFAULT, svd_cmp USES A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS
!   SINCE IT IS USUALLY FASTER.
!   IF YOU DONT WANT TO USE THIS STRATEGY, YOU CAN SPECIFY THE OPTIONAL LOGICAL
!   PARAMETER perfect_shift WITH THE VALUE false.
!
    call svd_cmp( a, s, failure, v=v, sort=sort, perfect_shift=perfect_shift, &
                  bisect=bisect, dqds=dqds )
!
!   THE ROUTINE RETURNS THE SINGULAR VALUES AND THE FIRST min(m,n) LEFT
!   AND RIGHT SINGULAR VECTORS.
!
!   ON EXIT OF svd_cmp :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a .
!
!       a IS OVERWRITTEN WITH THE FIRST min(m,n) LEFT SINGULAR VECTORS,
!       STORED COLUMNWISE.
!   
!       v CONTAINS THE FIRST min(m,n) RIGHT SINGULAR VECTORS,
!       STORED COLUMNWISE.
!
!       s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   THE SINGULAR VECTORS ARE REARRANGED ACCORDINGLY.
!                
!   IF THE PARAMETER v IS ABSENT, svd_cmp COMPUTES ONLY THE SINGULAR VALUES OF a             
!   AND, OPTIONALLY, STORES THE INTERMEDIATE BIDIAGONAL FORM OF a AND THE ORTHOGONAL
!   MATRICES USED TO REDUCE a TO BIDIAGONAL FORM. SEE EXAMPLES ex2_svd_cmp.F90 OR 
!   ex1_bd_inviter2.F90, WHICH SHOW HOW TO COMPUTE A PARTIAL SVD, FOR MORE DETAILS.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:k)/=zero )
                resid(:k,1_i4b) = s0(:k)
            elsewhere
                resid(:k,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:k) - s0(:k) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:k) - s0(:k))/resid(:k,1_i4b) ) )
!
!           DEALLOCATE WORK ARRAY.
!
            deallocate( s0 )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:,:k) - U(:,:k)*S(:k,:k).
!
        resid(:m,:k) = matmul(a2,v) - a(:,:k)*spread(s,dim=1,ncopies=m)
        a2(:k,1_i4b) = norm( resid(:m,:k), dim=2_i4b )
        err1 =  maxval( a2(:k,1_i4b) )/( anorm*real(m,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:k,:k) )
!
        resid(:k,:k) = abs( a2(:k,:k) - matmul( transpose(a(:m,:k)), a(:m,:k) ) )
        err2 = maxval( resid(:k,:k) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V**(t)*V.
!
        resid(:k,:k) = abs( a2(:k,:k) - matmul( transpose(v(:n,:k)), v(:n,:k) ) )
        err3 = maxval( resid(:k,:k) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, v, s )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from svd_cmp() ) = ', failure
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_svd_cmp
! ==========================
!
end program ex1_svd_cmp

ex1_svd_cmp2.F90

program ex1_svd_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP2
!   in module SVD_Procedures for computing a full SVD decomposition of a real m-by-n matrix
!   by variants of the Golub-Reinsch bidiagonal reduction algorithm and the Golub-Reinsch
!   bidiagonal QR implicit shift method.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of the
!   Golub-Reinsch bidiagonal reduction and bidiagonal QR algorithms are used.
!   The singular vectors are output in LAPACK-style format instead of columnwise
!   as in SVD_CMP subroutine. This is the main difference between SVD_CMP and SVD_CMP2
!   subroutines.                                                     
!
!
! LATEST REVISION : 07/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6,  &
                         svd_cmp2, norm, unit_matrix, random_seed_, random_number_,       &
                         gen_random_mat, singval_sort, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO k)
! FOR CASES GREATER THAN 0.
!
    integer(i4b), parameter :: prtunit = 6, m=3000, n=3000, k=min(m,n), nsvd0=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of svd_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, s0
    real(stnd), allocatable, dimension(:,:) :: a2, a, c, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl)   :: failure, perfect_shift, bisect, dqds, do_test
!   
    character      :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FULL SVD OF A REAL MATRIX USING THE BIDIAGONAL QR IMPLICIT METHOD WITH
!               A WAVE-FRONT ALGORITHM FOR APPLYING GIVENS ROTATIONS IN THE BIDIAGONAL
!               QR ALGORITHM AND, OPTIONALLY, A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS.
!               THE SINGULAR VECTORS ARE OUTPUT IN LAPACK-STYLE FORMAT INSTEAD OF COLUMNWISE.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF A PERFECT SHIFT STRATEGY MUST BE USED
!   IN THE BIDIAGONAL SVD ALGORITHM.
!
    perfect_shift = true
!
!   SPECIFY IF BISECTION IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM TO GIVE HIGHER ACCURACY.
!
    bisect = false
!
!   SPECIFY IF DQDS IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM TO GIVE HIGHER ACCURACY.
!
    dqds = false
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), c(k,k), s(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), resid(m,k), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(k), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:k) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   svd_cmp2 COMPUTES THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   m-BY-n MATRIX a. THE SVD IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE s IS AN m-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, u IS AN m-BY-m ORTHOGONAL MATRIX, AND
!   v IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF s
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   BY DEFAULT, svd_cmp2 USES A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS
!   SINCE IT IS USUALLY FASTER.
!   IF YOU DONT WANT TO USE THIS STRATEGY, YOU CAN SPECIFY THE OPTIONAL LOGICAL
!   PARAMETER perfect_shift WITH THE VALUE false.
!
    call svd_cmp2( a, s, failure, u_vt=c, sort=sort, perfect_shift=perfect_shift, &
                   bisect=bisect, dqds=dqds )
!
!   THE ROUTINE RETURNS THE SINGULAR VALUES, THE LEFT AND RIGHT
!   SINGULAR VECTORS. THE RIGHT SINGULAR VECTORS ARE RETURNED ROWWISE.
!
!   ON EXIT OF svd_cmp2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a.
!
!       IF m>=n,    a IS OVERWRITTEN WITH THE FIRST min(m,n)
!                   COLUMNS OF U (THE LEFT SINGULAR VECTORS
!                   STORED COLUMNWISE);
!                   c CONTAINS THE n-BY-n ORTHOGONAL MATRIX V**(t)
!                   (THE RIGHT SINGULAR VECTORS STORED ROWWISE).
!
!       IF m<n,     a IS OVERWRITTEN WITH THE FIRST min(m,n)
!                   ROWS OF V**(t) (THE RIGHT SINGULAR VECTORS
!                   STORED ROWWISE);
!                   c CONTAINS THE m-BY-m ORTHOGONAL MATRIX U
!                   (THE LEFT SINGULAR VECTORS STORED COLUMNWISE).
!
!       s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   THE SINGULAR VECTORS ARE REARRANGED ACCORDINGLY.
!                
!   IF THE PARAMETER u_vt IS ABSENT, svd_cmp2 COMPUTES ONLY THE SINGULAR VALUES OF a             
!   AND, OPTIONALLY, STORES THE INTERMEDIATE BIDIAGONAL FORM OF a AND THE ORTHOGONAL
!   MATRICES USED TO REDUCE a TO BIDIAGONAL FORM. SEE EXAMPLE ex2_svd_cmp2.F90, WHICH
!   SHOWS HOW TO COMPUTE A PARTIAL SVD, FOR MORE DETAILS.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:k)/=zero )
                resid(:k,1_i4b) = s0(:k)
            elsewhere
                resid(:k,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:k) - s0(:k) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:k) - s0(:k))/resid(:k,1_i4b) ) )
!
!           DEALLOCATE WORK ARRAY.
!
            deallocate( s0 )
!
        end if
!
        if ( m>=n ) then
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a(:m,:n)*V(:n,:k) - U(:m,:k)*S(:k,:k).
!
            resid(:m,:k) = matmul(a2(:m,:k), transpose(c(:k,:k)) ) - a(:m,:k)*spread(s,dim=1,ncopies=m)
            a2(:k,1_i4b) = norm( resid(:m,:k), dim=2_i4b )
            err1 =  maxval( a2(:k,1_i4b) )/( anorm*real(m,stnd) )
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
            call unit_matrix( a2(:n,:n) )
!
            resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose(a(:m,:n)), a(:m,:n) ) )
            err2 = maxval( resid(:n,:n) )/real(m,stnd)
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V**(t)*V.
!
            resid(:n,:n) = abs( a2(:n,:n) - matmul( c(:n,:n), transpose(c(:n,:n)) ) )
            err3 = maxval( resid(:n,:n) )/real(n,stnd)
!
        else
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a(:m,:n)*V(:n,:k) - U(:m,:k)*S(:k,:k).
!
            resid(:m,:k) =  matmul(a2(:m,:n),transpose(a(:k,:n))) - c(:k,:k)*spread(s,dim=1,ncopies=k)
            a2(:k,1_i4b) = norm( resid(:m,:k), dim=2_i4b )
            err1 =  maxval( a2(:k,1_i4b) )/( anorm*real(m,stnd) )
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
            call unit_matrix( a2(:m,:m) )
!
            resid(:m,:m) = abs( a2(:m,:m) - matmul( transpose(c(:m,:m )), c(:m,:m ) ) )
            err2 = maxval( resid(:m,:m) )/real(m,stnd)
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V**(t)*V.
!
            resid(:m,:m) = abs( a2(:m,:m) - matmul( a(:m,:n), transpose(a(:m,:n)) ) )
            err3 = maxval( resid(:m,:m) )/real(n,stnd)
!
        end if
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, c, s )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from svd_cmp2() ) = ', failure
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_svd_cmp2
! ===========================
!
end program ex1_svd_cmp2

ex1_svd_cmp3.F90

program ex1_svd_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP3 
!   in module SVD_Procedures for computing a full SVD decomposition of a real m-by-n matrix by
!   a bidiagonal factorization and the Golub-Reinsch bidiagonal QR implicit method.
!
!   The computations are parallelized if OpenMP is used and the fast one-sided Rhala-Barlow algorithm
!   is used in the first bidiagonalization phase. Furthermore, an highly efficient variant of the
!   Golub-Reinsch bidiagonal QR implicit algorithm is used.
!
!   In many cases SVD_CMP3 can be faster than SVD_CMP or SVD_CMP2 (especially when the number of available
!   processors is reduced) at the expense of a slight loss of orthogonality for the smaller left
!   (if m>=n) or right (if m<n) singular vectors.
!
!
! LATEST REVISION : 07/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6, &
                         svd_cmp3, norm, unit_matrix, random_seed_, random_number_,      &
                         gen_random_mat, singval_sort, merror, allocate_error,           &
                         ifirstloc, safmin
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO k)
! FOR CASES GREATER THAN 0.
!
    integer(i4b), parameter :: prtunit = 6, m=3000, n=3000, k=min(m,n), nsvd0=1000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7.
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of svd_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, s0
    real(stnd), allocatable, dimension(:,:) :: a2, a, c, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type, nvec
!
    logical(lgl) :: reortho, perfect_shift, bisect, dqds, failure, failure_bd, do_test
!   
    character:: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FULL SVD OF A REAL MATRIX USING THE ONE-SIDED
!               RALHA-BARLOW BIDIAGONAL REDUCTION ALGORITHM AND
!               THE BIDIAGONAL QR IMPLICIT METHOD WITH A WAVE-FRONT
!               ALGORITHM FOR APPLYING GIVENS ROTATIONS IN THE
!               BIDIAGONAL QR ALGORITHM AND, OPTIONALLY, A PERFECT
!               SHIFT STRATEGY FOR THE SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW ALGORITHM.
!
    reortho = true
!
!   SPECIFY IF A PERFECT SHIFT STRATEGY MUST BE USED
!   IN THE BIDIAGONAL SVD ALGORITHM.
!
    perfect_shift = true
!
!   SPECIFY IF BISECTION IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM TO GIVE HIGHER ACCURACY.
!
    bisect = false
!
!   SPECIFY IF DQDS IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM TO GIVE HIGHER ACCURACY.
!
    dqds = false
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), c(k,k), s(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), resid(m,k), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(k), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:k) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   svd_cmp3 COMPUTES THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   m-BY-n MATRIX a. THE SVD IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE s IS AN m-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, u IS AN m-BY-m ORTHOGONAL MATRIX, AND
!   v IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF s
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   BY DEFAULT, svd_cmp3 USES A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS
!   SINCE IT IS USUALLY FASTER.
!   IF YOU DONT WANT TO USE THIS STRATEGY, YOU CAN SPECIFY THE OPTIONAL LOGICAL
!   PARAMETER perfect_shift WITH THE VALUE false.
!
    call svd_cmp3( a, s, failure, c, sort=sort, perfect_shift=perfect_shift, bisect=bisect, &
                   dqds=dqds, reortho=reortho, failure_bd=failure_bd )
!
!   THE ROUTINE RETURNS THE SINGULAR VALUES AND THE FIRST min(m,n) LEFT AND
!   RIGHT SINGULAR VECTORS. THE RIGHT SINGULAR VECTORS ARE RETURNED ROWWISE
!   IF m<n.
!
!   ON EXIT OF svd_cmp3 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a.
!
!       failure_bd= false :  INDICATES SUCCESSFUL EXIT IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!       failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS OF ORTHOGONALITY
!                            CAN BE EXPECTED IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!                            OF a FOR THE LAST LEFT (IF m>=n) OR RIGHT (IF m<n) SINGULAR VECTORS.
!
!       IF m>=n,    a IS OVERWRITTEN WITH THE FIRST n
!                   COLUMNS OF U (THE LEFT SINGULAR VECTORS,
!                   STORED COLUMNWISE);
!                   c CONTAINS THE n-BY-n ORTHOGONAL MATRIX V .
!
!       IF m<n,     a IS OVERWRITTEN WITH THE FIRST m ROWS OF
!                   V**(t) (THE RIGHT SINGULAR VECTORS,
!                   STORED ROWWISE);
!                   c CONTAINS THE m-BY-m ORTHOGONAL MATRIX U.
!
!       s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   THE SINGULAR VECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:k)/=zero )
                resid(:k,1_i4b) = s0(:k)
            elsewhere
                resid(:k,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:k) - s0(:k) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:k) - s0(:k))/resid(:k,1_i4b) ) )
!
!           DEALLOCATE WORK ARRAY.
!
            deallocate( s0 )
!
        end if
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nvec = ifirstloc( logical( s<=tmp, lgl ) ) - 1_i4b
!
        if ( m>=n ) then
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a(:m,:n)*V(:n,:nvec) - U(:m,:nvec)*S(:nvec,:nvec).
!
            resid(:m,:nvec) = matmul(a2(:m,:k), c(:k,:nvec) ) - a(:m,:nvec)*spread(s(:nvec),dim=1,ncopies=m)
            a2(:nvec,1_i4b) = norm( resid(:m,:nvec), dim=2_i4b )
            err1 =  maxval( a2(:nvec,1_i4b) )/( anorm*real(m,stnd) )
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
            call unit_matrix( a2(:n,:n) )
!
            resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(a(:m,:nvec)), a(:m,:nvec) ) )
            err2 = maxval( resid(:nvec,:nvec) )/real(m,stnd)
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V**(t)*V.
!
            resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose(c(:n,:n)), c(:n,:n) ) )
            err3 = maxval( resid(:n,:n) )/real(n,stnd)
!
        else
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a(:m,:n)*V(:n,:nvec) - U(:m,:nvec)*S(:nvec,:nvec).
!
            resid(:m,:nvec) =  matmul(a2(:m,:n),transpose(a(:nvec,:n))) - c(:k,:nvec)*spread(s(:nvec),dim=1,ncopies=k)
            a2(:nvec,1_i4b) = norm( resid(:m,:nvec), dim=2_i4b )
            err1 =  maxval( a2(:nvec,1_i4b) )/( anorm*real(m,stnd) )
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
            call unit_matrix( a2(:m,:m) )
!
            resid(:m,:m) = abs( a2(:m,:m) - matmul( transpose(c(:m,:m )), c(:m,:m ) ) )
            err2 = maxval( resid(:m,:m) )/real(m,stnd)
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V**(t)*V.
!
            resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( a(:nvec,:n), transpose(a(:nvec,:n)) ) )
            err3 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
        end if
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, c, s )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Effective rank of the matrix                         = ', nvec
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from svd_cmp3() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from svd_cmp3() ) = ', failure
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_svd_cmp3
! ===========================
!
end program ex1_svd_cmp3

ex1_svd_cmp4.F90

program ex1_svd_cmp4
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP4 in module SVD_Procedures
!   for computing all singular values and, optionally, singular vectors of a real n-by-m matrix with n>=m
!   by a bidiagonal factorization and the Golub-Reinsch bidiagonal QR implicit method.
!
!   The computations are parallelized if OpenMP is used and the fast one-sided Rhala-Barlow algorithm
!   is used in the first bidiagonalization phase. Furthermore, an highly efficient variant of the
!   Golub-Reinsch bidiagonal QR implicit algorithm is used.
!
!   In many cases SVD_CMP4 can be faster than SVD_CMP or SVD_CMP2 (especially when the number of available
!   processors is reduced) at the expense of a slight loss of orthogonality for the smaller left
!   singular vectors.
!
!   The main difference betwen SVD_CMP4 and SVD_CMP3 subroutines is that SVD_CMP4 allows the user
!   to compute only the singular values of the input matrix and to output both the bidiagonal form
!   of the input matrix and the associated orthogonal matrices in the bidiagonal decomposition of
!   this matrix. This allows the user to compute selected singular vectors in a second step with
!   other statpack subroutines like BD_INVITER2 or BD_DEFLATE2 subroutines. Another difference
!   is that, in SVD_CMP4, the input matrix must have more rows than columns.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 07/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c0_9, one, seven, c30, c50, c1_e6,   &
                         bd_inviter2, svd_cmp4, norm, unit_matrix, random_seed_, random_number_, &
                         gen_random_mat, singval_sort, merror, allocate_error, ifirstloc, safmin
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE n>=m),
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX FOR CASES GREATER THAN 0,
! nsing IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP nsing SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, n=3000, m=3000, nsvd0=1000,    &
                               nsing=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7.
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 1 of svd_cmp4'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, s0, d, e
    real(stnd), allocatable, dimension(:,:) :: a, a2, p, leftvec, rightvec, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type, mnthr_nsing, nrank, nvec
!
    logical(lgl) :: failure_bd, failure_bd_svd, failure_bd_inviter, do_test, gen_p, reortho
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : ALL SINGULAR VALUES AND nsing SINGULAR VECTORS OF A n-BY-m REAL MATRIX WITH n>=m
!               USING THE RHALA-BARLOW ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM, THE DQDS
!               ALGORITHM FOR THE SINGULAR VALUES AND THE INVERSE ITERATION METHOD FOR THE
!               SINGULAR VECTORS (e.g., A PARTIAL SVD DECOMPOSITION).
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW ALGORITHM.
!
    reortho = true
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING RIGHT SINGULAR VECTORS. IF
!   nsing EXCEEDS mnthr_nsing, THE RIGHT ORTHOGONAL MATRIX p
!   OF THE ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM IS GENERATED
!   AND BACK-TRANSFORMATION IS DONE BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( m, stnd )*c0_9, i4b )
!
    gen_p = nsing>=mnthr_nsing
!    gen_p = true
!    gen_p = false
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), leftvec(n,nsing), rightvec(m,nsing),  &
              p(m,m), s(m), d(m), e(m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:n,:m) = a(:n,:m)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(nsing), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            if ( nsvd0<nsing ) then
                s0(:nsvd0) = s(:nsvd0)
                s0(nsvd0+1_i4b:nsing) = zero
            else
                s0(:nsing) = s(:nsing)
            end if
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a .
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE ALL THE SINGULAR VALUES OF a AND
!   ONLY nsing LEFT AND RIGHT SINGULAR VECTORS OF a (E.G. A PARTIAL SVD DECOMPOSITION
!   OF a) IN TWO STEPS:
!
!   STEP1 : COMPUTE ALL SINGULAR VALUES OF a AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE svd_cmp4.
!
    call svd_cmp4( a, s, failure=failure_bd_svd, v=p, sort=sort, d=d, e=e, sing_vec=false, &
                   gen_p=gen_p, reortho=reortho, failure_bd=failure_bd  )
!
!   ON EXIT OF svd_cmp4 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a.
!
!       failure_bd= false :  INDICATES SUCCESSFUL EXIT
!       failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS OF ORTHOGONALITY
!                            CAN BE EXPECTED IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!                            OF a FOR THE LAST LEFT SINGULAR VECTORS.
!
!       s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!
!   HERE, SORT = 'd' IS USED. THIS IS REQUIRED FOR THE USE OF bd_inviter2 LATER.
!                
!   IF THE PARAMETER sing_vec IS USED WITH THE VALUE false IN THE CALL OF svd_cmp4,
!   svd_cmp4 COMPUTES ONLY THE SINGULAR VALUES OF a AND, OPTIONALLY, STORES THE
!   INTERMEDIATE BIDIAGONAL FORM OF a AND THE ORTHOGONAL MATRICES USED TO REDUCE
!   a TO BIDIAGONAL FORM.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN THE OPTIONAL ARGUMENTS d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM d AND e ARE STORED 
!   IN mat AND p ON EXIT.
!
!   STEP2 : COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!   INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION
!   WITH SUBROUTINE bd_inviter2 .
!
!   ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!   THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call bd_inviter2( a, p, d, e, s(:nsing), leftvec, rightvec, failure=failure_bd_inviter, &
                      maxiter=maxiter )
!
!   ON EXIT OF bd_inviter2 :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT.
!         failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE FOR SOME SINGULAR VECTORS.
!
!   THE SINGULAR VECTORS OF THE BIDIAGONAL FORM ARE COMPUTED USING maxiter INVERSE ITERATIONS
!   FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF ARE THEN ORTHOGONALIZED BY 
!   THE MODIFIED GRAM-SCHMIDT ALGORITHM IF NECESSARY. THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED 
!   BY BACK TRANSFORMATIONS. ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT
!   SINGULAR VECTORS OF a, RESPECTIVELY.
!
!   NOTE THAT bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:nsing)/=zero )
                resid(:nsing,1_i4b) = s0(:nsing)
            elsewhere
                resid(:nsing,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsing) - s0(:nsing) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsing) - s0(:nsing))/resid(:nsing,1_i4b) ) )
!
!           DEALLOCATE WORK ARRAY.
!
            deallocate( s0 )
!
        end if
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nrank = ifirstloc( logical( s<=tmp, lgl ) ) - 1_i4b
        nvec = min( nrank, nsing )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nvec) - U(:n,:nvec)*S(:nvec,:nvec),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nvec) = matmul(a2,rightvec(:m,:nvec)) - leftvec(:n,:nvec)*spread(s(:nvec),dim=1,ncopies=n)
        a2(:nvec,1_i4b) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 = maxval( a2(:nvec,1_i4b) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nvec)**(t)*U(:n,:nvec).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(leftvec(:n,:nvec)), leftvec(:n,:nvec) ) )
!
        err2 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, p, leftvec, rightvec, s, d, e )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure_bd_inviter ) then
!    if ( err<=eps .and. .not.failure_bd_svd .and. .not.failure_bd_inviter ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Number of requested singular triplets                = ', nsing
        write (prtunit,*) 'Effective rank of the matrix                         = ', nrank
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from svd_cmp4() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from svd_cmp4() ) = ', failure_bd_svd
    write (prtunit,*) ' FAILURE ( from bd_inviter2() ) = ', failure_bd_inviter
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_svd_cmp4
! ===========================
!
end program ex1_svd_cmp4

ex1_svd_cmp5.F90

program ex1_svd_cmp5
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP5 in module SVD_Procedures
!   for computing all singular values and, optionally, singular vectors of a real matrix by
!   a bidiagonal factorization and the Golub-Reinsch bidiagonal QR implicit method.
!
!   The computations are parallelized if OpenMP is used and the one-sided Rhala-Barlow algorithm
!   is used in the first bidiagonalization phase. Furthermore, an highly efficient variant of the
!   Golub-Reinsch bidiagonal QR implicit algorithm is used.                                       
!
!   Finally, SVD_CMP5 corrects fully the possible loss of orthogonality induced by the one-sided
!   Ralha-Barlow bidiagonal reduction algorithm if the input matrix is nearly singular contrary
!   to SVD_CMP3 and SVD_CMP4 subroutines.
!
!   SVD_CMP5 is one of the most robust algorithms available in statpack (with SVD_CMP or SVD_CM2)
!   for computing the SVD of a real matrix and performs well the number of available processors
!   is not to big. If a large set of processors is available, SVD_CMP or SVD_CM2 are usually faster.
! 
!
! LATEST REVISION : 07/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6,  &
                         svd_cmp5, norm, unit_matrix, random_seed_, random_number_,       &
                         gen_random_mat, singval_sort, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO k)
! FOR CASES GREATER THAN 0.
!
    integer(i4b), parameter :: prtunit = 6, m=3000, n=3000, k=min(m,n), nsvd0=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of svd_cmp5'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, ulp, &
                                               abs_err, rel_err, anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, s0
    real(stnd), allocatable, dimension(:,:) :: a, a2, v, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl) :: failure, failure_bd, perfect_shift, bisect, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FULL SVD OF A REAL MATRIX USING THE ONE-SIDED
!               RALHA-BARLOW BIDIAGONAL REDUCTION ALGORITHM AND
!               THE BIDIAGONAL QR IMPLICIT METHOD WITH A WAVE-FRONT
!               ALGORITHM FOR APPLYING GIVENS ROTATIONS IN THE
!               BIDIAGONAL QR ALGORITHM AND, OPTIONALLY, A PERFECT
!               SHIFT STRATEGY FOR THE SINGULAR VECTORS. A FINAL BACK
!               TRANSFORMATION AND REORTOGONALIZATION STEP IS ALSO
!               PERFORMED TO CORRECT FOR THE POSSIBLE LOSS OF ORTHOGONALITY
!               INDUCED BY THE ONE-SIDED RALHA-BARLOW BIDIAGONAL
!               REDUCTION ALGORITHM IF THE INPUT MATRIX IS NEARLY SINGULAR.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF A PERFECT SHIFT STRATEGY MUST BE USED
!   IN THE BIDIAGONAL SVD ALGORITHM.
!
    perfect_shift = true
!
!   SPECIFY IF BISECTION IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM TO GIVE HIGHER ACCURACY.
!
    bisect = false
!
!   SPECIFY IF DQDS IS USED FOR THE PERFECT SHIFT STRATEGY
!   IN THE BIDIAGONAL SVD ALGORITHM TO GIVE HIGHER ACCURACY.
!
    dqds = false
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), v(n,k), s(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( s(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), resid(m,k), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
        if ( mat_type>0_i4b ) then
!
!           ALLOCATE WORK ARRAY.
!
            allocate( s0(k), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           SAVE THE TRUE SINGULAR VALUES.
!
            s0(:nsvd0) = s(:nsvd0)
            s0(nsvd0+1_i4b:k) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   svd_cmp5 COMPUTES THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   m-BY-n MATRIX a. THE SVD IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN m-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN m-BY-m ORTHOGONAL MATRIX, AND
!   V IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   BY DEFAULT, svd_cmp5 USES A PERFECT SHIFT STRATEGY FOR THE SINGULAR VECTORS
!   SINCE IT IS USUALLY FASTER.
!   IF YOU DONT WANT TO USE THIS STRATEGY, YOU CAN SPECIFY THE OPTIONAL LOGICAL
!   PARAMETER perfect_shift WITH THE VALUE false.
!
    call svd_cmp5( a, s, failure, v, sort=sort, perfect_shift=perfect_shift, &
                   bisect=bisect, dqds=dqds, failure_bd=failure_bd )
!
!   THE ROUTINE RETURNS THE SINGULAR VALUES AND THE FIRST min(m,n) LEFT
!   AND RIGHT SINGULAR VECTORS.
!
!   ON EXIT OF svd_cmp5 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a.
!
!       failure_bd= false :  INDICATES SUCCESSFUL EXIT AND THAT MAXIMUM ACCURACY WAS OBTAINED
!                            IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION OF a .
!       failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR.
!
!       a IS OVERWRITTEN WITH THE FIRST min(m,n) LEFT SINGULAR VECTORS,
!       STORED COLUMNWISE.
!   
!       v CONTAINS THE FIRST min(m,n) RIGHT SINGULAR VECTORS,
!       STORED COLUMNWISE.
!
!       s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   BY DEFAULT, THE SINGULAR VALUES ARE SORTED INTO INTO DESCENDING ORDER.
!   THE SINGULAR VECTORS ARE REARRANGED ACCORDINGLY.
!                
!   IF THE PARAMETER v IS ABSENT, svd_cmp5 COMPUTES ONLY THE SINGULAR VALUES OF a.        
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( s0(:k)/=zero )
                resid(:k,1_i4b) = s0(:k)
            elsewhere
                resid(:k,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:k) - s0(:k) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:k) - s0(:k))/resid(:k,1_i4b) ) )
!
!           DEALLOCATE WORK ARRAY.
!
            deallocate( s0 )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:,:k) - U(:,:k)*S(:k,:k).
!
        resid(:m,:k) = matmul(a2,v) - a(:,:k)*spread(s,dim=1,ncopies=m)
        a2(:k,1_i4b) = norm( resid(:m,:k), dim=2_i4b )
        err1 =  maxval( a2(:k,1_i4b) )/( anorm*real(m,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:k,:k) )
!
        resid(:k,:k) = abs( a2(:k,:k) - matmul( transpose(a(:m,:k)), a(:m,:k) ) )
        err2 = maxval( resid(:k,:k) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V**(t)*V.
!
        resid(:k,:k) = abs( a2(:k,:k) - matmul( transpose(v(:n,:k)), v(:n,:k) ) )
        err3 = maxval( resid(:k,:k) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, v, s )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from svd_cmp5() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from svd_cmp5() ) = ', failure
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_svd_cmp5
! ===========================
!
end program ex1_svd_cmp5

ex1_svd_cmp6.F90

program ex1_svd_cmp6
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP6 in module SVD_Procedures
!   for computing all, or only the leading, singular values and vectors of a real m-by-n matrix by
!   a bidiagonal factorization, and the bisection and inverse iteration methods for the required singular
!   values and vectors.
!
!   The computations are parallelized if OpenMP is used and the one-sided Rhala-Barlow algorithm
!   is used in the bidiagonalization phase.                                                                        
!
!   Finally, SVD_CMP6 corrects fully the possible loss of orthogonality induced by the one-sided
!   Ralha-Barlow bidiagonal reduction algorithm if the input matrix is nearly singular contrary
!   to SVD_CMP3 and SVD_CMP4 subroutines.
!
!
! LATEST REVISION : 12/03/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6,  &
                         svd_cmp6, norm, unit_matrix, random_seed_, random_number_,       &
                         gen_random_mat, singval_sort, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO k)
! FOR CASES GREATER THAN 0,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, m=3000, n=3000, k=min(m,n), nsvd0=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of svd_cmp6'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, tmp, tmp2, ulp, &
                  abs_err, rel_err, anorm, elapsed_time
!
    real(stnd), allocatable, dimension(:)   :: singval0
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid
!
    real(stnd), dimension(:),   pointer :: s
    real(stnd), dimension(:,:), pointer :: v
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type, nsvd
!
    logical(lgl) :: failure, failure_bd, failure_bisect, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FULL OR PARTIAL SVD OF A REAL MATRIX USING THE ONE-SIDED
!               RALHA-BARLOW BIDIAGONAL REDUCTION ALGORITHM, A BISECTION
!               ALGORITHM FOR COMPUTING SINGULAR VALUES AND AN INVERSE
!               ITERATION ALGORITHM FOR COMPUTING SINGULAR VECTORS.
!               A FINAL BACK TRANSFORMATION AND REORTOGONALIZATION STEP
!               IS ALSO PERFORMED TO CORRECT FOR THE POSSIBLE LOSS OF
!               ORTHOGONALITY INDUCED BY THE ONE-SIDED RALHA-BARLOW
!               BIDIAGONAL REDUCTION ALGORITHM IF THE INPUT MATRIX
!               IS NEARLY SINGULAR.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 8_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY nsvd, THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
!
    nsvd = 20
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    if ( mat_type>0_i4b ) then
!
        allocate( a(m,n), singval0(k), stat=iok )
!
    else
!
        allocate( a(m,n), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( singval0 ) ) then
!
            if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', singval0(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( singval0(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( singval0(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(m,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
        if ( mat_type>0_i4b ) then
!
            singval0(nsvd0+1_i4b:k) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   NULLIFY THE POINTERS s AND v SO THAT THEIR STATUT CAN BE CHECKED INSIDE
!   svd_cmp6 SUBROUTINE.
!
    nullify( s, v )
!
!   svd_cmp6 COMPUTES A FULL OR PARTIAL SINGULAR VALUE DECOMPOSITION (SVD) OF
!   A REAL m-BY-n MATRIX a. THE FULL SVD IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN m-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN m-BY-m ORTHOGONAL MATRIX, AND
!   V IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   BY DEFAULT, svd_cmp6 COMPUTES min(m,n) SINGULAR TRIPLETS (E.G., A THIN SVD).
!   IF YOU WANT A PARTIAL SVD, YOU CAN SPECIFY THE OPTIONAL INTEGER
!   PARAMETER nsvd WITH THE REQUESTED RANK OF THE PARTIAL SVD.
!
    call svd_cmp6( a, s, v, failure, sort=sort, nsvd=nsvd, maxiter=maxiter, &
                   failure_bd=failure_bd, failure_bisect=failure_bisect )
!
!   THE ROUTINE RETURNS THE nsvd LARGEST SINGULAR VALUES AND THE ASSOCIATED nsvd LEFT
!   AND RIGHT SINGULAR VECTORS.
!
!   ON EXIT OF svd_cmp6 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE INVERSE ITERATION ALGORITHM FAILS TO CONVERGE FOR
!                         SOME SINGULAR VECTORS IN maxiter ITERATIONS.
!
!       failure_bd= false :  INDICATES SUCCESSFUL EXIT AND THAT MAXIMUM ACCURACY WAS OBTAINED
!                            IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION OF a .
!       failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR.
!
!       failure_bisect= false :  INDICATES SUCCESSFUL EXIT AND THAT THE BISECTION ALGORITHM CONVERGED
!                                FOR ALL THE COMPUTED SINGULAR VALUES TO THE DESIRED ACCURACY.
!       failure_bisect= true  :  INDICATES THAT SOME OR ALL OF THE SINUGULAR VALUES FAILED TO CONVERGE
!                                OR WERE NOT COMPUTED. THIS IS GENERALLY CAUSED BY INACCURATE ARITHMETIC.
!
!       nsvd IS THE NUMBER OF SINGULAR TRIPLETS WHICH HAVE BEEN
!       COMPUTED BY THE SUBROUTINE, WHICH CAN BE GREATER THAN THE REQUESTED
!       NUMBER IF MULTIPLE SINGULAR VALUES AT INDEX nsvd MAKE UNIQUE SELECTION
!       IMPOSSIBLE.
!
!       a IS OVERWRITTEN WITH THE FIRST nsvd LEFT SINGULAR VECTORS,
!       STORED COLUMNWISE.
!
!       POINTER s CONTAINS THE nsvd LARGEST SINGULAR VALUES OF a .
!   
!       POINTER v CONTAINS THE ASSOCIATED nsvd RIGHT SINGULAR VECTORS
!       STORED COLUMNWISE.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   BY DEFAULT, THE SINGULAR VALUES ARE SORTED INTO INTO DESCENDING ORDER.
!   THE SINGULAR VECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( resid(m,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( singval0(:nsvd)/=zero )
                resid(:nsvd,1_i4b) = singval0(:nsvd)
            elsewhere
                resid(:nsvd,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsvd) - singval0(:nsvd) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsvd) - singval0(:nsvd))/resid(:nsvd,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:,:nsvd) - U(:,:nsvd)*S(:nsvd,:nsvd).
!
        resid(:m,:nsvd) = matmul(a2,v) - a(:m,:nsvd)*spread(s(:nsvd),dim=1,ncopies=m)
        a2(:nsvd,1_i4b) = norm( resid(:m,:nsvd), dim=2_i4b )
        err1 =  maxval( a2(:nsvd,1_i4b) )/( anorm*real(m,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:nsvd,:nsvd) )
!
        resid(:nsvd,:nsvd) = abs( a2(:nsvd,:nsvd) - matmul( transpose(a(:m,:nsvd)), a(:m,:nsvd) ) )
        err2 = maxval( resid(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V**(t)*V.
!
        resid(:nsvd,:nsvd) = abs( a2(:nsvd,:nsvd) - matmul( transpose(v(:n,:nsvd)), v(:n,:nsvd) ) )
        err3 = maxval( resid(:nsvd,:nsvd) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS AND POINTERS.
!
    if ( mat_type>0_i4b ) then
        deallocate( a, v, s, singval0 )
    else
        deallocate( a, v, s )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE        ( from svd_cmp6() ) = ', failure
    write (prtunit,*) ' FAILURE_BD     ( from svd_cmp6() ) = ', failure_bd
    write (prtunit,*) ' FAILURE_BISECT ( from svd_cmp6() ) = ', failure_bisect
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsvd, ' singular values and vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_svd_cmp6
! ===========================
!
end program ex1_svd_cmp6

ex1_svd_cmp7.F90

program ex1_svd_cmp7
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP7 in module SVD_Procedures
!   for computing all singular values and all, or only the leading, singular vectors of a m-by-n real matrix
!   by a one- or two-stage bidiagonal factorization, the dqds algorithm for computing the singular values
!   and the inverse iteration method for the required singular vectors.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of the
!   bidiagonal reduction and inverse iteration algorithms are used.
!
!   Finally, SVD_CMP7 is one of the fastest deterministic methods available in statpack for computing
!   the Singular Values Decomposition (SVD) of a real matrix.
!
!
! LATEST REVISION : 12/03/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6,  &
                         svd_cmp7, norm, unit_matrix, random_seed_, random_number_,       &
                         gen_random_mat, singval_sort, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO k)
! FOR CASES GREATER THAN 0,
! nsvd IS THE NUMBER OF LEADING SINGULAR VECTORS, WHICH MUST BE COMPUTED AND
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, m=3000, n=3000, k=min(m,n), nsvd0=3000, nsvd=k, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of svd_cmp7'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, tmp, tmp2, ulp, &
                  abs_err, rel_err, anorm, elapsed_time
!
    real(stnd), allocatable, dimension(:)   :: s, singval0
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid, u, v
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl) :: failure, failure_dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FULL OR PARTIAL SVD OF A REAL MATRIX USING A FAST VARIANT
!               OF THE GOLUB-REINSCH BIDIAGONAL REDUCTION ALGORITHM, THE
!               DQDS ALGORITHM FOR COMPUTING SINGULAR VALUES AND AN INVERSE
!               ITERATION ALGORITHM FOR COMPUTING SINGULAR VECTORS.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    if ( mat_type>0_i4b ) then
!
        allocate( a(m,n), s(k), u(m,nsvd), v(n,nsvd), singval0(k), stat=iok )
!
    else
!
        allocate( a(m,n), s(k), u(m,nsvd), v(n,nsvd), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( singval0 ) ) then
!
            if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', singval0(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( singval0(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( singval0(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(m,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
        if ( mat_type>0_i4b ) then
!
            singval0(nsvd0+1_i4b:k) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   svd_cmp7 COMPUTES A FULL OR PARTIAL SINGULAR VALUE DECOMPOSITION (SVD) OF
!   A REAL m-BY-n MATRIX a. THE FULL SVD IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN m-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN m-BY-m ORTHOGONAL MATRIX, AND
!   V IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   svd_cmp7 COMPUTES ALL min(m,n) SINGULAR TRIPLETS (E.G., A THIN SVD)
!   OR ALL SINGULAR VALUES AND ONLY THE nsvd LEADING LEFT AND RIGHT SINGULAR
!   VECTORS OF THE INPUT MATRIX a , AT THE USER OPTION. THE NUMBER OF
!   LEFT AND RIGHT SINGULAR VECTORS COMPUTED IS DETERMINED BY THE NUMBER
!   OF COLUMNS IN THE INPUT MATRIX ARGUMENTS u AND v, WHICH SHOULD BE THE
!   SAME.
!
    call svd_cmp7( a, s, u, v, failure, sort=sort, maxiter=maxiter, failure_dqds=failure_dqds )
!
!   ON EXIT OF svd_cmp7 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE DQDS ALGORITHM FAILED TO CONVERGE OR THAT
!                         THE INVERSE ITERATION ALGORITHM FAILED TO CONVERGE FOR
!                         SOME SINGULAR VECTORS IN maxiter ITERATIONS.
!
!       failure_dqds= false :  INDICATES SUCCESSFUL EXIT AND THAT THE DQDS ALGORITHM CONVERGED
!                              FOR ALL THE COMPUTED SINGULAR VALUES TO THE DESIRED ACCURACY.
!       failure_dqds= true  :  INDICATES THAT SOME OR ALL OF THE SINUGULAR VALUES FAILED TO CONVERGE
!                              OR WERE NOT COMPUTED. THIS IS GENERALLY CAUSED BY INACCURATE ARITHMETIC.
!                              IN THAT CASE THE SIGN OF THE INCORRECT SINGULAR VALUES IN s IS SET TO
!                              NEGATIVE AND MATRIX ARGUMENTS u AND v ARE FILLED WITH A QUIET NAN IN OUTPUT
!                              OF THE SUBROUTINE.
!
!       a IS OVERWRITTEN AND USED AS WORKSPACE INSIDE THE SUBROUTINE.
!
!       s CONTAINS THE SINGULAR VALUES OF a .
!   
!       u CONTAINS THE nsvd LEADING LEFT SINGULAR VECTORS STORED COLUMNWISE.
!   
!       v CONTAINS THE nsvd LEADING RIGHT SINGULAR VECTORS STORED COLUMNWISE.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   BY DEFAULT, THE SINGULAR VALUES ARE SORTED INTO INTO DESCENDING ORDER.
!   THE SINGULAR VECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( resid(m,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( singval0(:nsvd)/=zero )
                resid(:nsvd,1_i4b) = singval0(:nsvd)
            elsewhere
                resid(:nsvd,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsvd) - singval0(:nsvd) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsvd) - singval0(:nsvd))/resid(:nsvd,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:,:nsvd) - U(:,:nsvd)*S(:nsvd,:nsvd).
!
        resid(:m,:nsvd) = matmul(a2,v) - u(:m,:nsvd)*spread(s(:nsvd),dim=1,ncopies=m)
        a2(:nsvd,1_i4b) = norm( resid(:m,:nsvd), dim=2_i4b )
        err1 =  maxval( a2(:nsvd,1_i4b) )/( anorm*real(m,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:nsvd,:nsvd) )
!
        resid(:nsvd,:nsvd) = abs( a2(:nsvd,:nsvd) - matmul( transpose(u(:m,:nsvd)), u(:m,:nsvd) ) )
        err2 = maxval( resid(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V**(t)*V.
!
        resid(:nsvd,:nsvd) = abs( a2(:nsvd,:nsvd) - matmul( transpose(v(:n,:nsvd)), v(:n,:nsvd) ) )
        err3 = maxval( resid(:nsvd,:nsvd) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( mat_type>0_i4b ) then
        deallocate( a, u, v, s, singval0 )
    else
        deallocate( a, u, v, s )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE      ( from svd_cmp7() ) = ', failure
    write (prtunit,*) ' FAILURE_DQDS ( from svd_cmp7() ) = ', failure_dqds
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsvd, ' singular vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_svd_cmp7
! ===========================
!
end program ex1_svd_cmp7

ex1_svd_cmp8.F90

program ex1_svd_cmp8
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP8 in module SVD_Procedures
!   for computing all singular values and all, or only the leading, singular vectors of a m-by-n real matrix
!   with m>=n by a one- or two-stage bidiagonal factorization, the dqds algorithm for computing the singular
!   values and the inverse iteration method for the required singular vectors.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of the
!   bidiagonal reduction and inverse iteration algorithms are used.
!
!   Finally, SVD_CMP8 is one of the fastest deterministic methods available in statpack for computing
!   the Singular Values Decomposition (SVD) of a real matrix. An important difference with SVD_CMP7
!   subroutine is that the fast one-sided Rhala-Barlow algorithm is used in the bidiagonalization phase.      
!   Thus, in many cases SVD_CMP8 can be faster than SVD_CMP7 (especially when the number of available
!   processors is reduced) at the expense of a slight loss of orthogonality for the smaller left
!   singular vectors.
!
!
! LATEST REVISION : 12/03/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6,  &
                         svd_cmp8, norm, unit_matrix, random_seed_, random_number_,       &
                         gen_random_mat, singval_sort, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX, WITH m>=n,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO n)
! FOR CASES GREATER THAN 0,
! nsvd IS THE NUMBER OF LEADING SINGULAR VECTORS, WHICH MUST BE COMPUTED AND
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit = 6, m=3000, n=3000, nsvd0=3000, nsvd=n, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 and 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of svd_cmp8'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, err3, eps, tmp, tmp2, ulp, &
                  abs_err, rel_err, anorm, elapsed_time
!
    real(stnd), allocatable, dimension(:)   :: s, singval0
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid, u, v
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl) :: failure, failure_bd, failure_dqds, reortho, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : FULL OR PARTIAL SVD OF A REAL MATRIX USING THE ONE-SIDED
!               RALHA-BARLOW BIDIAGONAL REDUCTION ALGORITHM, THE DQDS
!               ALGORITHM FOR COMPUTING SINGULAR VALUES AND AN INVERSE
!               ITERATION ALGORITHM FOR COMPUTING SINGULAR VECTORS.
!               A FINAL REORTOGONALIZATION STEP IS ALSO PERFORMED
!               TO CORRECT FOR THE POSSIBLE LOSS OF ORTHOGONALITY
!               INDUCED BY THE ONE-SIDED RALHA-BARLOW BIDIAGONAL
!               REDUCTION ALGORITHM IF THE INPUT MATRIX IS NEARLY SINGULAR.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 0_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW ALGORITHM.
!
    reortho = false
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    if ( mat_type>0_i4b ) then
!
        allocate( a(m,n), s(n), u(m,nsvd), v(n,nsvd), singval0(n), stat=iok )
!
    else
!
        allocate( a(m,n), s(n), u(m,nsvd), v(n,nsvd), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            singval0(:nsvd0-1_i4b) = one
            singval0(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                singval0(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                singval0(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( singval0(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( singval0 ) ) then
!
            if ( .not.all( ieee_is_normal( singval0(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', singval0(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( singval0(:nsvd0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( singval0(:nsvd0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(m,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
        if ( mat_type>0_i4b ) then
!
            singval0(nsvd0+1_i4b:n) = zero
!
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   svd_cmp8 COMPUTES A FULL OR PARTIAL SINGULAR VALUE DECOMPOSITION (SVD) OF
!   A REAL m-BY-n MATRIX a. THE FULL SVD IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN m-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN m-BY-m ORTHOGONAL MATRIX, AND
!   V IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   svd_cmp8 COMPUTES ALL min(m,n) SINGULAR TRIPLETS (E.G., A THIN SVD)
!   OR ALL SINGULAR VALUES AND ONLY THE nsvd LEADING LEFT AND RIGHT SINGULAR
!   VECTORS OF THE INPUT MATRIX a , AT THE USER OPTION. THE NUMBER OF
!   LEFT AND RIGHT SINGULAR VECTORS COMPUTED IS DETERMINED BY THE NUMBER
!   OF COLUMNS IN THE INPUT MATRIX ARGUMENTS u AND v, WHICH SHOULD BE THE
!   SAME.
!
    call svd_cmp8( a, s, u, v, failure, sort=sort, maxiter=maxiter, reortho=reortho,  &
                   failure_bd=failure_bd, failure_dqds=failure_dqds )
!
!   ON EXIT OF svd_cmp8 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE DQDS ALGORITHM FAILED TO CONVERGE OR THAT
!                         THE INVERSE ITERATION ALGORITHM FAILED TO CONVERGE FOR
!                         SOME SINGULAR VECTORS IN maxiter ITERATIONS.
!
!       failure_bd= false :  INDICATES SUCCESSFUL EXIT AND THAT MAXIMUM ACCURACY WAS OBTAINED
!                            IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION OF a .
!       failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR.
!
!       failure_dqds= false :  INDICATES SUCCESSFUL EXIT AND THAT THE DQDS ALGORITHM CONVERGED
!                              FOR ALL THE COMPUTED SINGULAR VALUES TO THE DESIRED ACCURACY.
!       failure_dqds= true  :  INDICATES THAT SOME OR ALL OF THE SINUGULAR VALUES FAILED TO CONVERGE
!                              OR WERE NOT COMPUTED. THIS IS GENERALLY CAUSED BY INACCURATE ARITHMETIC.
!                              IN THAT CASE THE SIGN OF THE INCORRECT SINGULAR VALUES IN s IS SET TO
!                              NEGATIVE AND MATRIX ARGUMENTS u AND v ARE FILLED WITH A QUIET NAN IN OUTPUT
!                              OF THE SUBROUTINE.
!
!       a IS OVERWRITTEN AND USED AS WORKSPACE INSIDE THE SUBROUTINE.
!
!       s CONTAINS THE SINGULAR VALUES OF a .
!   
!       u CONTAINS THE nsvd LEADING LEFT SINGULAR VECTORS STORED COLUMNWISE.
!   
!       v CONTAINS THE nsvd LEADING RIGHT SINGULAR VECTORS STORED COLUMNWISE.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   BY DEFAULT, THE SINGULAR VALUES ARE SORTED INTO INTO DESCENDING ORDER.
!   THE SINGULAR VECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( resid(m,nsvd), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        if ( mat_type>0_i4b ) then
!
!           COMPUTE ERRORS FOR SINGULAR VALUES.
!
            where( singval0(:nsvd)/=zero )
                resid(:nsvd,1_i4b) = singval0(:nsvd)
            elsewhere
                resid(:nsvd,1_i4b) = one
            end where
!
!           ABSOLUTE ERRORS OF SINGULAR VALUES.
!
            abs_err = maxval( abs( s(:nsvd) - singval0(:nsvd) ) )
!
!           RELATIVE ERRORS OF SINGULAR VALUES.
!
            rel_err = maxval( abs( (s(:nsvd) - singval0(:nsvd))/resid(:nsvd,1_i4b) ) )
!
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:,:nsvd) - U(:,:nsvd)*S(:nsvd,:nsvd).
!
        resid(:m,:nsvd) = matmul(a2,v) - u(:m,:nsvd)*spread(s(:nsvd),dim=1,ncopies=m)
        a2(:nsvd,1_i4b) = norm( resid(:m,:nsvd), dim=2_i4b )
        err1 =  maxval( a2(:nsvd,1_i4b) )/( anorm*real(m,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:nsvd,:nsvd) )
!
        resid(:nsvd,:nsvd) = abs( a2(:nsvd,:nsvd) - matmul( transpose(u(:m,:nsvd)), u(:m,:nsvd) ) )
        err2 = maxval( resid(:nsvd,:nsvd) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V**(t)*V.
!
        resid(:nsvd,:nsvd) = abs( a2(:nsvd,:nsvd) - matmul( transpose(v(:n,:nsvd)), v(:n,:nsvd) ) )
        err3 = maxval( resid(:nsvd,:nsvd) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( mat_type>0_i4b ) then
        deallocate( a, u, v, s, singval0 )
    else
        deallocate( a, u, v, s )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    if ( do_test ) then
!
        write (prtunit,*) 
!
        if ( mat_type>0_i4b ) then
!
            write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
            write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
        end if
!
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE      ( from svd_cmp8() ) = ', failure
    write (prtunit,*) ' FAILURE_BD   ( from svd_cmp8() ) = ', failure_bd
    write (prtunit,*) ' FAILURE_DQDS ( from svd_cmp8() ) = ', failure_dqds
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsvd, ' singular vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_svd_cmp8
! ===========================
!
end program ex1_svd_cmp8

ex1_sym_inv.F90

program ex1_sym_inv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function SYM_INV
!   in module Lin_Procedures for computing the inverse of a symmetric
!   positive-definite matrix. 
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    USE Statpack, ONLY : lgl, i4b, stnd, true, false, zero, one, sym_inv, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED POSITIVE DEFINITE MATRIX
! AND m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX WHICH IS USED TO DERIVE THE POSITIVE
! DEFINITE MATRIX. m SHOULD BE GREATER OR EQUAL TO n.
!
    integer(i4b), parameter :: prtunit=6, n=4000, m=4000
!
    character(len=*), parameter :: name_proc='Example 1 of sym_inv'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, ata, atainv, res
!
    integer(i4b) :: j
    integer      :: iok, istart, iend, irate
!
    logical(lgl) :: do_test, upper
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : INVERSE OF A REAL SYMMETRIC POSITIVE DEFINITE MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = false
!
!   SPECIFY IF THE UPPER OR LOWER PART OF THE SYMMETRIC
!   DEFINITE POSITIVE MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), ata(n,n), atainv(n,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
! 
!   GENERATE A RANDOM DATA MATRIX a WITH MORE ROWS THAN COLUMNS.
!
    call random_number( a )
!
!   GENERATE A n-BY-n SYMMETRIC POSITIVE DEFINITE MATRIX FROM a .
!
    ata = matmul( transpose(a), a )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart, count_rate=irate )
!
!   COMPUTE THE MATRIX INVERSE OF ata WITH FUNCTION sym_inv.
!
    atainv = sym_inv( ata, upper=upper )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    elapsed_time = real( iend - istart, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE ata TIMES ITS INVERSE - IDENTITY.
!
        res = matmul( ata, atainv )
!
        do j = 1_i4b, n
            res(j,j) = res(j,j) - one
        end do
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        err = sum( abs(res) ) / sum( abs(ata) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, ata, atainv )
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the inverse of a positive definite symmetric matrix of size ', &
       n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_sym_inv
! ==========================
!
end program ex1_sym_inv

ex1_symlin_filter.F90

program ex1_symlin_filter
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines/functions LP_COEF, HP_COEF,
!   SYMLIN_FILTER in module Time_Series_Procedures for band-pass filtering of time series with
!   a Lanczos filter.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lp_coef, hp_coef, symlin_filter
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES AND
! k IS THE NUMBER OF TERMS OF THE LANCZOS FILTER.
!
    integer(i4b), parameter :: prtunit=6, n=2000, k=21
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                     :: err
    real(stnd), dimension(n)       :: y, y2, y3
    real(stnd), dimension(k)       :: coefl, coefh
!
    integer(i4b) :: pc, nfilt, n1, n2, khalf
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of symlin_filter'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : BAND-PASS FILTERING OF TIME SERIES WITH A LANCZOS FILTER.
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY FOR LATER USE.
!
    y2(:n) = y(:n)
    y3(:n) = y(:n)
!
!   DETERMINE THE CUTOFF PERIOD.
!
    pc  = 18
!
!   FUNCTION lp_coef COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- LOW PASS FILTER WITH CUTOFF PERIOD PL (E.G. CUTOFF FREQUENCY FC =  1/PL).
!
    coefl(:k) = lp_coef( pl=pc, k=k )
!
!   FUNCTION hp_coef COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- HIGH PASS FILTER WITH CUTOFF PERIOD PH (E.G. CUTOFF FREQUENCY FC =  1/PH).
!
    coefh(:k) = hp_coef( ph=pc, k=k )
!
!   PL AND  PH ARE EXPRESSED IN NUMBER OF POINTS, i.e. PL OR PH =6(18) CORRESPONDS TO PERIODS
!   OF 1.5 YRS FOR QUATERLY(MONTHLY) DATA.
!
!   SUBROUTINE symlin_filter PERFORMS A SYMMETRIC FILTERING OPERATION ON AN INPUT TIME
!   SERIES (E.G. THE ARGUMENT VEC) WITH THE ARRAY OF SYMMETRIC LINEAR FILTER COEFFICIENTS COEF(:).
!
!   NOTE THAT (size(COEF)-1)/2 DATA POINTS WILL BE LOST FROM EACH END OF THE TIME SERIES,
!   SO THAT NFILT= size(VEC) - size(COEF) + 1) TIME OBSERVATIONS ARE RETURNED
!   AND THE REMAINING PART OF VEC(:) IS SET TO ZERO. NFILT IS AN OPTIONAL ARGUMENT
!   GIVING THE LENGTH OF THE FILTERED TIME SERIES ON EXIT.
!
    call symlin_filter( vec=y2(:n), coef=coefl(:k),  nfilt=nfilt )
!
    call symlin_filter( vec=y3(:n), coef=coefh(:k)  )
!
!   NOW RECONSTRUCT THE ORIGINAL TIME SERIES FROM THE TWO FILTERED TIME SERIES.
!
    y2(:nfilt) = y2(:nfilt) + y3(:nfilt)
!
!   TEST THE ACCURACY OF THE RECONSTRUCTION.
!
    khalf = ( k - 1 )/2
    n1    = khalf + 1
    n2    = n - khalf
!
    err = maxval(abs(y(n1:n2)-y2(:nfilt)))/maxval(abs(y(n1:n2)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_symlin_filter
! ================================
!
end program ex1_symlin_filter

ex1_symlin_filter2.F90

program ex1_symlin_filter2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines/functions LP_COEF, HP_COEF,
!   SYMLIN_FILTER2 in module Time_Series_Procedures for band-pass filtering of time series with
!   a Lanczos filter.
!
!   In SYMLIN_FILTER2, no data points will be lost at each end of the time series contrary
!   to what happened in SYMLIN_FILTER subroutine. however, observations at each end
!   of the time series are affected by end effects.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lp_coef, hp_coef, symlin_filter2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES AND
! k IS THE NUMBER OF TERMS OF THE LANCZOS FILTER.
!
    integer(i4b), parameter :: prtunit=6, n=2000, k=21
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                     :: err
    real(stnd), dimension(n)       :: y, y2, y3
    real(stnd), dimension(k)       :: coefl, coefh
!
    integer(i4b) :: pc
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of symlin_filter2'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : BAND-PASS FILTERING OF TIME SERIES WITH A LANCZOS FILTER.
!
!   GENERATE A RANDOM REAL NUMBER SEQUENCE OF LENGTH n .
!
    call random_number( y(:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:n) = y(:n)
    y3(:n) = y(:n)
!
!   DETERMINE THE CUTOFF PERIOD.
!
    pc  = 18
!
!   FUNCTION lp_coef COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- LOW PASS FILTER WITH CUTOFF PERIOD PL (E.G. CUTOFF FREQUENCY FC =  1/PL).
!
    coefl(:k) = lp_coef( pl=pc, k=k )
!
!   FUNCTION hp_coef COMPUTES THE K-TERM LEAST SQUARES APPROXIMATION TO AN
!   -IDEAL- HIGH PASS FILTER WITH CUTOFF PERIOD PH (E.G. CUTOFF FREQUENCY FC =  1/PH).
!
    coefh(:k) = hp_coef( ph=pc, k=k )
!
!   PL AND  PH ARE EXPRESSED IN NUMBER OF POINTS, i.e. PL OR PH =6(18) CORRESPONDS TO PERIODS
!   OF 1.5 YRS FOR QUATERLY(MONTHLY) DATA.
!
!   SUBROUTINE symlin_filter2 PERFORMS A SYMMETRIC FILTERING OPERATION ON AN INPUT TIME
!   SERIES (EG THE ARGUMENT VEC) WITH THE ARRAY OF SYMMETRIC LINEAR FILTER COEFFICIENTS COEF(:).
!
!   NO DATA POINTS WILL BE LOST, HOWEVER (size(COEF)-1)/2 OBSERVATIONS
!   AT EACH END OF THE TIME SERIES ARE AFFECTED BY END EFFECTS.
!
    call symlin_filter2( vec=y2(:n), coef=coefl(:k) )
!
    call symlin_filter2( vec=y3(:n), coef=coefh(:k)  )
!
!   NOW RECONSTRUCT THE ORIGINAL TIME SERIES FROM THE TWO FILTERED TIME SERIES.
!
    y2(:n) = y2(:n) + y3(:n)
!
!   TEST THE ACCURACY OF THE RECONSTRUCTION.
!
    err = maxval(abs(y(:n)-y2(:n)))/maxval(abs(y(:n)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex1_symlin_filter2
! =================================
!
end program ex1_symlin_filter2

ex1_symtrid_bisect.F90

program ex1_symtrid_bisect
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SYMTRID_BISECT
!   in module Eig_Procedures for computing the largest or smallest eigenvalues of a
!   real symmetric tridiagonal matrix by a bisection method.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_DEFLATE in module Eig_Procedures
!    for computing selected eigenvectors of a real symmetric tridiagonal matrix.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, two, c50,     &
                         safmin, trid_deflate, symtrid_bisect, norm, merror,   &
                         allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE TRIDIAGONAL MATRIX AND neig IS THE NUMBER
! OF THE COMPUTED SMALLEST OR LARGEST EIGENVALUES/EIGENVECTORS,
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig=100
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of symtrid_bisect'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, abstol, normt, &
                                               elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, eigval, temp, temp2
    real(stnd), allocatable, dimension(:,:) :: resid, eigvec
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: max_qr_steps, neig2, j
!
    logical(lgl)  :: failure1, failure2, do_test, small, ortho
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE PARAMETERS OF THE BISECTION ALGORITHM.
!
!   DETERMINE IF YOU WANT TO COMPUTE THE neig SMALLEST OR LARGEST EIGENVALUES.
!
    small = false
!
!   THE EIGENVALUES ARE COMPUTED WITH MAXIMUM ACCURACY WHEN OPTIONAL PARAMETER abstol IS SET TO
!   sqrt( safmin ), WHICH IS THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD.
!
    abstol = sqrt( safmin )
!
!   DETERMINE PARAMETERS OF THE DEFLATION ALGORITHM.
!
!   OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED EIGENVECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF EIGENVALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE EIGENVECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A TRIDIAGONAL MATRIX FOR A GIVEN EIGENVALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL REQUESTED
!   EIGENVALUES EXCEEDS max_qr_steps * neig.
!
    max_qr_steps = 4_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( d(n), e(n), eigval(n), eigvec(n,neig), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A (1-2-1) TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    d(:n) = two
    e(:n) = one
!
!    d(:n) = 0.5
!    e(:n) = 0.5
!
!    call random_number( d(:n) )
!    call random_number( e(:n) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE neig EIGENVALUES OF THE TRIDIAGONAL MATRIX BY A BISECTION METHOD WITH HIGH ACCURACY.
!   THE EIGENVALUES ARE COMPUTED WITH MAXIMUM ACCURACY WHEN OPTIONAL PARAMETER abstol IS SET TO
!   sqrt( safmin ), WHICH IS THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD.
!
    call symtrid_bisect( d, e, neig2, eigval, failure1, small=small, sort=sort, le=neig, abstol=abstol )
!
!   ON EXIT OF symtrid_bisect :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BISECTION
!                         ALGORITHM.
!
!   NEXT, COMPUTE THE neig ASSOCIATED EIGENVECTORS OF THE TRIDIAGONAL MATRIX a BY A
!   DEFLATION TECHNIQUE APPLIED ON THE TRIDIAGONAL MATRIX d_e.
!
!   ON ENTRY OF SUBROUTINE trid_deflate, PARAMETER eigval CONTAINS SELECTED
!   EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!   THE EIGENVALUES VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call trid_deflate( d(:n), e(:n), eigval(:neig), eigvec(:n,:neig), failure=failure2,     &
                       ortho=ortho, max_qr_steps=max_qr_steps )
!
!   ON EXIT OF trid_deflate :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT;
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE DEFLATION
!                         ALGORITHM.
!
!       eigvec CONTAINS THE neig EIGENVECTORS OF THE TRIDIAGONAL MATRIX ASSOCIATED WITH THE
!       EIGENVALUES eigval(:neig).
!
!   trid_deflate MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!   IDENTICAL AND IF THESE EIGENVALUES ARE NOT COMPUTED WITH HIGH ACCURACY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( resid(n,neig), temp(n), temp2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u - u*s
!       WHERE s ARE THE EIGENVALUES AND u THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX.
!
        do j=1_i4b, neig
!
            temp(1_i4b:n) = eigvec(1_i4b:n,j)
!
            temp2(1_i4b)         = d(1_i4b)*temp(1_i4b) + e(1_i4b)*temp(2_i4b)
            temp2(2_i4b:n-1_i4b) = e(1_i4b:n-2_i4b)*temp(1_i4b:n-2_i4b)           +   &
                                   d(2_i4b:n-1_i4b)*temp(2_i4b:n-1_i4b)           +   &
                                   e(2_i4b:n-1_i4b)*temp(3_i4b:n)
            temp2(n)             = e(n-1_i4b)*temp(n-1_i4b) + d(n)*temp(n)
!
            resid(1_i4b:n,j) = temp2(1_i4b:n) - eigval(j)*temp(1_i4b:n)
!
        end do
!
        temp(:neig) = norm( resid(1_i4b:n,1_i4b:neig), dim=2_i4b )
        normt = sqrt( sum( d(1_i4b:n)**2 ) + two*(sum( e(1_i4b:n-1_i4b)**2) ) )
!
        err1  = maxval( temp(:neig) )/( normt*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        resid(:neig,:neig) = matmul( transpose( eigvec(:n,:neig) ), eigvec(:n,:neig) )
!
        do j=1_i4b, neig
            resid(j,j) = resid(j,j) - one
        end do
!
        err2 = maxval( abs( resid(:neig,:neig) ) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resid, temp, temp2, d, e, eigval, eigvec )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( d, e, eigval, eigvec )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_bisect() ) = ', failure1
    write (prtunit,*) ' FAILURE ( from trid_deflate()   ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_symtrid_bisect
! =================================
!
end program ex1_symtrid_bisect

ex1_symtrid_cmp.F90

program ex1_symtrid_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines SYMTRID_CMP and
!   ORTHO_GEN_SYMTRID in module EIG_Procedures for computing a tridiagonal reduction
!   of a real symmetric matrix.
!
!
! LATEST REVISION : 05/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, symtrid_cmp,         &
                         ortho_gen_symtrid, norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIC MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of symtrid_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid, trid
!
    integer(i4b) :: l
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : TRIDIAGONAL REDUCTION OF A REAL SYMMETRIC MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM DATA MATRIX AND
!   FROM IT A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), trid(n,n), resid(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE SYMMETRIC MATRIX.
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   CALL symtrid_cmp AND ortho_gen_symtrid_cmp TO REDUCE THE MATRIX a TO TRIDIAGONAL FORM
!
!                      a = Q*TRID*Q**(t)
!
!   WHERE Q IS ORTHOGONAL AND TRID IS A SYMMETRIC TRIDIAGONAL MATRIX.
!
!   ON ENTRY OF symtrid_cmp, a MUST CONTAINS THE LEADING n-BY-n UPPER TRIANGULAR PART
!   OF THE MATRIX TO BE REDUCED AND THE STRICTLY LOWER PART OF a IS NOT REFERENCED.
!
    call symtrid_cmp( a, d, e, store_q=true )
!
!   ON EXIT OF symtrid_cmp:
!
!         ARGUMENTS d AND e CONTAIN, RESPECTIVELY, THE DIAGONAL AND
!         OFF-DIAGONAL ELEMENTS OF THE TRIDIAGONAL MATRIX TRID.
!
!         IF THE OPTIONAL ARGUMENT store_q IS PRESENT AND SET TO TRUE,
!         THE LEADING n-BY-n UPPER TRIANGULAR PART OF a IS OVERWRITTEN
!         BY THE MATRIX Q AS A PRODUCT OF ELEMENTARY REFLECTORS.
!
!   ortho_gen_symtrid GENERATES THE MATRIX Q STORED AS A PRODUCT OF
!   ELEMENTARY REFLECTORS AFTER A CALL TO symtrid_cmp WITH store_q=true.
!
    call ortho_gen_symtrid( a )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*Q - Q*TRID
!
        trid(:n,:n) = zero
!
        do l = 1_i4b, n-1_i4b
            trid(l,l)       = d(l)
            trid(l,l+1_i4b) = e(l)
            trid(l+1_i4b,l) = e(l)
        end do
!
        trid(n,n) = d(n)
!
        resid(:n,:n) = matmul( a2(:n,:n), a(:n,:n)  )           &
                       - matmul( a(:n,:n), trid(:n,:n) )
!
        trid(:n,1_i4b) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( trid(:n,1_i4b) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q**(t)*Q.
!
        call unit_matrix( a2(:n,:n) )
!
        resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose(a(:n,:n )), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, trid, resid )
!
    endif
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, d, e )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed tridiagonal decomposition a = Q*TRD*Q**(t) = ', err1
        write (prtunit,*) 'Orthogonality of the computed Q orthogonal matrix                   = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the tridiagonal reduction of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_symtrid_cmp
! ==============================
!
end program ex1_symtrid_cmp

ex1_symtrid_cmp2.F90

program ex1_symtrid_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines SYMTRID_CMP2 and
!   ORTHO_GEN_SYMTRID in module EIG_Procedures for computing a tridiagonal reduction
!   of a real cross-product matrix using the Rhala one-sided method.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, symtrid_cmp2,         &
                         ortho_gen_symtrid, norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE MATRIX USED
! TO COMPUTE THE MATRIX CROSS-PRODUCT, m MUST BE GREATER THAN n, OTHERWISE
! symtrid_cmp2 WILL STOP WITH AN ERROR MESSAGE.
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of symtrid_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e
    real(stnd), allocatable, dimension(:,:) :: a, at, ata, resid, trid
!
    integer(i4b) :: l
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, upper
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : TRIDIAGONAL REDUCTION OF A REAL SYMMETRIC MATRIX CROSS-PRODUCT,
!               USING THE ONE-SIDED RALHA METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( ata(n,n), at(n,m), trid(n,n), resid(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        at(:n,:m) = transpose( a(:m,:n) )
!
!       COMPUTE THE SYMMETRIC MATRIX CROSS-PRODUCT.
!
        ata(:n,:n) = matmul( at(:n,:m), a(:m,:n) )
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   CALL symtrid_cmp2 AND ortho_gen_symtrid_cmp TO REDUCE THE MATRIX CROSS-PRODUCT TO TRIDIAGONAL FORM
!
!                      ata = Q*TRID*Q**(t)
!
!   WHERE Q IS ORTHOGONAL AND TRID IS A SYMMETRIC TRIDIAGONAL MATRIX.
!
!   ON ENTRY OF symtrid_cmp2, a MUST CONTAINS THE INITIAL m-by-n MATRIX USED
!   FOR COMPUTING THE MATRIX CROSS-PRODUCT. THE ORTHOGONAL MATRIX Q IS STORED
!   IN FACTORED FORM IF THE LOGICAL ARGUMENT store_q IS SET TO true.
!
    call symtrid_cmp2( a(:m,:n), d, e, store_q=true )
!
!   ON EXIT OF symtrid_cmp2:
!
!         ARGUMENTS d AND e CONTAIN, RESPECTIVELY, THE DIAGONAL AND
!         OFF-DIAGONAL ELEMENTS OF THE TRIDIAGONAL MATRIX TRID.
!
!         IF THE LOGICAL ARGUMENT store_q IS SET TO TRUE ON ENTRY,
!         THE LEADING n-BY-n LOWER TRIANGULAR PART OF a IS OVERWRITTEN
!         BY THE MATRIX Q AS A PRODUCT OF ELEMENTARY REFLECTORS.
!
!   ortho_gen_symtrid GENERATES THE MATRIX Q STORED AS A PRODUCT OF
!   ELEMENTARY REFLECTORS AFTER A CALL TO symtrid_cmp2 WITH store_q=true.
!
    call ortho_gen_symtrid( a(:n,:n), false )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*Q - Q*TRID
!
        trid(:n,:n) = zero
!
        do l = 1_i4b, n-1_i4b
            trid(l,l)       = d(l)
            trid(l,l+1_i4b) = e(l)
            trid(l+1_i4b,l) = e(l)
        end do
!
        trid(n,n) = d(n)
!
        resid(:n,:n) = matmul( ata(:n,:n), a(:n,:n)  )           &
                       - matmul( a(:n,:n), trid(:n,:n) )
!
        trid(:n,1_i4b) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( trid(:n,1_i4b) )/( norm( ata )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q**(t)*Q.
!
        call unit_matrix( ata(:n,:n) )
!
        at(:n,:n) = transpose( a(:n,:n) )
!
        resid(:n,:n) = abs( ata(:n,:n) - matmul( at(:n,:n), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( ata, trid, resid, at )
!
    endif
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, d, e )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed tridiagonal decomposition a**(t)*a = Q*TRD*Q**(t) = ', err1
        write (prtunit,*) 'Orthogonality of the computed orthogonal matrix Q                          = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the tridiagonal reduction of a ', &
       n, ' by ', n,' real symmetric matrix cross-product is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_symtrid_cmp2
! ===============================
!
end program ex1_symtrid_cmp2

ex1_symtrid_qri.F90

program ex1_symtrid_qri
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SYMTRID_QRI
!   in module Eig_Procedures for computing the full EigenValue Decomposition (EVD)
!   of a real symmetric tridiagonal matrix.
!
!   The computations are parallelized if OpenMP is used and an efficient
!   variant of the tridiagonal QR algorithm is used.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, c50,            &
                         allocate_error, merror, symtrid_qri, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED TRIDIAGONAL MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of symtrid_qri'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, normt, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, d2, e2, temp, temp2, resid2
    real(stnd), allocatable, dimension(:,:) :: eigvec, resid, id
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: j
!
    logical(lgl) :: failure, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC
!               TRIDIAGONAL MATRIX USING THE QR METHOD WITH IMPLICIT SHIFT.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( eigvec(n,n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    call random_number( d(:n) )
    call random_number( e(:n) )

!    d(:n) = 0.5_stnd
!    e(:n) = 0.5_stnd

!    d(:n) = 1._stnd
!    e(:n) = 2._stnd
!
    if ( do_test ) then
!
        allocate( d2(n), e2(n), resid(n,n), resid2(n), id(n,n),  &
                  temp(n), temp2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE TRIDIAGONAL MATRIX.
!
        d2(:n) = d(:n)
        e2(:n) = e(:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE TRIDIAGONAL MATRIX TRID
!   WITH SUBROUTINE symtrid_qri .
!
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC TRIDIAGONAL MATRIX TRID
!   IS WRITTEN
!
!                       TRID = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF TRID; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE ASSOCIATED EIGENVECTORS OF TRID.
!
!   ON ENTRY OF symtrid_qri d AND e CONTAIN, RESPECTIVELY, THE DIAGONAL AND OFF-DIAGONAL
!   OF THE SYMMETRIC TRIDIAGONAL MATRIX TRID. IF THE OPTIONAL ARGUMENT init_mat IS SET TO
!   TRUE, THE MATRIX ARGUMENT eigvec IS INITIALIZED TO THE IDENTITY MATRIX AND THE SUBROUTINE
!   WILL COMPUTE THE EIGENVECTORS OF TRID. IF init_mat IS NOT USED OR SET TO FALSE, THE QR ITERATIONS
!   ARE DIRECTLY APPLIED TO eigvec WITHOUT ANY INITIAIZATION. THIS IS USEFUL FOR COMPUTING THE
!   EIGENVECTORS OF A FULL SYMMETRIC MATRIX.
!
    call symtrid_qri( d(:n), e(:n), failure, eigvec(:n,:n), sort=sort, init_mat=true )
!
!   THE ROUTINE RETURNS THE EIGENVALUES AND THE EIGENVECTORS OF TRID.
!
!   ON EXIT OF symtrid_qri :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF THE TRIDIAGONAL MATRIX TRID .
!
!       eigvec IS OVERWRITTEN WITH THE EIGENVECTORS, STORED COLUMNWISE;
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF TRID.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!   THE EIGENVECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION TRID*U - U*D
!       WHERE D ARE THE EIGENVALUES AND U THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX.
!
        do j=1_i4b, n
!
            temp(1_i4b:n) = eigvec(1_i4b:n,j)
!
            temp2(1_i4b)         = d2(1_i4b)*temp(1_i4b) + e2(1_i4b)*temp(2_i4b)
            temp2(2_i4b:n-1_i4b) = e2(1_i4b:n-2_i4b)*temp(1_i4b:n-2_i4b)           +   &
                                   d2(2_i4b:n-1_i4b)*temp(2_i4b:n-1_i4b)           +   &
                                   e2(2_i4b:n-1_i4b)*temp(3_i4b:n)
            temp2(n)             = e2(n-1_i4b)*temp(n-1_i4b) + d2(n)*temp(n)
!
            resid(1_i4b:n,j) = temp2(1_i4b:n) - d(j)*temp(1_i4b:n)
!
        end do
!
        resid2(:n) = norm( resid(:n,:n), dim=2_i4b )
        normt      = sqrt( sum( d2(1_i4b:n)**2 ) + sum( e2(1_i4b:n-1_i4b)**2 ) )
!
        err1 =  maxval( resid2(:n) )/( normt*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U
!       WHERE U ARE THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX TRID.
!
        call unit_matrix( id(:n,:n) )
!
        resid(:n,:n) = abs( id(:n,:n) - matmul( transpose(eigvec(:n,:n)), eigvec(:n,:n) ) )
!
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resid, resid2, id, temp, temp2, d2, e2, eigvec, d, e )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( eigvec, d, e )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_qri() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', n, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_symtrid_qri
! ==============================
!
end program ex1_symtrid_qri

ex1_symtrid_qri2.F90

program ex1_symtrid_qri2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SYMTRID_QRI2
!   in module Eig_Procedures for computing the full EigenValue Decomposition (EVD)
!   of a real symmetric tridiagonal matrix.
!
!   The computations are parallelized if OpenMP is used and a highly efficient
!   variant of the tridiagonal QR algorithm is used. A perfect shift strategy
!   and a wave-front algorithm for applying Givens rotations to eigenvectors are used
!   in the tridiagonal QR algorithm. With these changes, SYMTRID_QRI2 is usually much
!   faster than subroutine SYMTRID_QRI for computing an EVD of a real symmetric
!   tridiagonal matrix for large matrices.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, c50,            &
                         allocate_error, merror, symtrid_qri2, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED TRIDIAGONAL MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of symtrid_qri2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, normt, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, d2, e2, temp, temp2, resid2
    real(stnd), allocatable, dimension(:,:) :: eigvec, resid, id
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: j
!
    logical(lgl) :: failure, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC
!               TRIDIAGONAL MATRIX USING THE QR METHOD WITH A
!               PERFECT SHIFT STRATEGY AND A WAVE FRONT ALGORITHM
!               FOR APPLYING GIVENS ROTATIONS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( eigvec(n,n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    call random_number( d(:n) )
    call random_number( e(:n) )

!    d(:n) = 0.5_stnd
!    e(:n) = 0.5_stnd

!    d(:n) = 1._stnd
!    e(:n) = 2._stnd
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( d2(n), e2(n), resid(n,n), resid2(n), id(n,n),  &
                  temp(n), temp2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE TRIDIAGONAL MATRIX .
!
        d2(:n) = d(:n)
        e2(:n) = e(:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE TRIDIAGONAL MATRIX TRID
!   WITH SUBROUTINE symtrid_qri2.
!
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC TRIDIAGONAL MATRIX TRID
!   IS WRITTEN
!
!                       TRID = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF TRID; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF TRID.
!
!   ON ENTRY OF symtrid_qri2 d AND e CONTAIN, RESPECTIVELY, THE DIAGONAL AND OFF-DIAGONAL
!   OF THE SYMMETRIC TRIDIAGONAL MATRIX TRID. IF THE OPTIONAL ARGUMENT init_mat IS SET TO
!   TRUE, THE MATRIX ARGUMENT eigvec IS INITIALIZED TO THE IDENTITY MATRIX AND THE SUBROUTINE
!   WILL COMPUTE THE EIGENVECTORS OF TRID. IF init_mat IS NOT USED OR SET TO FALSE, THE QR ITERATIONS
!   ARE DIRECTLY APPLIED TO eigvec WITHOUT ANY INITIAIZATION. THIS IS USEFUL FOR COMPUTING THE
!   EIGENVECTORS OF A FULL SYMMETRIC MATRIX.
!
    call symtrid_qri2( d(:n), e(:n), failure, eigvec(:n,:n), sort=sort, init_mat=true )
!
!   THE ROUTINE RETURNS THE EIGENVALUES AND THE EIGENVECTORS OF TRID.
!
!   ON EXIT OF symtrid_qri2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF THE TRIDIAGONAL MATRIX TRID .
!
!       eigvec IS OVERWRITTEN WITH THE EIGENVECTORS, STORED COLUMNWISE;
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF TRID.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!   THE EIGENVECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION TRID*U - U*D
!       WHERE D ARE THE EIGENVALUES AND U THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX.
!
        do j=1_i4b, n
!
            temp(1_i4b:n) = eigvec(1_i4b:n,j)
!
            temp2(1_i4b)         = d2(1_i4b)*temp(1_i4b) + e2(1_i4b)*temp(2_i4b)
            temp2(2_i4b:n-1_i4b) = e2(1_i4b:n-2_i4b)*temp(1_i4b:n-2_i4b)           +   &
                                   d2(2_i4b:n-1_i4b)*temp(2_i4b:n-1_i4b)           +   &
                                   e2(2_i4b:n-1_i4b)*temp(3_i4b:n)
            temp2(n)             = e2(n-1_i4b)*temp(n-1_i4b) + d2(n)*temp(n)
!
            resid(1_i4b:n,j) = temp2(1_i4b:n) - d(j)*temp(1_i4b:n)
!
        end do
!
        resid2(:n) = norm( resid(:n,:n), dim=2_i4b )
        normt      = sqrt( sum( d2(1_i4b:n)**2 ) + sum( e2(1_i4b:n-1_i4b)**2 ) )
!
        err1 =  maxval( resid2(:n) )/( normt*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U
!       WHERE U ARE THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX TRID.
!
        call unit_matrix( id(:n,:n) )
!
        resid(:n,:n) = abs( id(:n,:n) - matmul( transpose(eigvec(:n,:n)), eigvec(:n,:n) ) )
!
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resid, resid2, id, temp, temp2, d2, e2, eigvec, d, e )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( eigvec, d, e )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_qri2() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', n, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_symtrid_qri2
! ===============================
!
end program ex1_symtrid_qri2

ex1_symtrid_qri3.F90

program ex1_symtrid_qri3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SYMTRID_QRI3
!   in module Eig_Procedures for computing the full EigenValue Decomposition (EVD)
!   of a real symmetric tridiagonal matrix.
!
!   The computations are parallelized if OpenMP is used and a highly efficient
!   variant of the tridiagonal QR algorithm is used. A  wave-front algorithm
!   for applying Givens rotations to eigenvectors is used in the tridiagonal
!   QR algorithm. With these changes, SYMTRID_QRI3 is usually much
!   faster than subroutine SYMTRID_QRI for computing an EVD of a real symmetric
!   tridiagonal matrix for large matrices.
!
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, c50,            &
                         allocate_error, merror, symtrid_qri3, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED TRIDIAGONAL MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of symtrid_qri3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, normt, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, d2, e2, temp, temp2, resid2
    real(stnd), allocatable, dimension(:,:) :: eigvec, resid, id
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: j
!
    logical(lgl) :: failure, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC
!               TRIDIAGONAL MATRIX USING THE QR METHOD WITH A
!               WAVE FRONT ALGORITHM FOR APPLYING GIVENS ROTATIONS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( eigvec(n,n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    call random_number( d(:n) )
    call random_number( e(:n) )

!    d(:n) = 0.5_stnd
!    e(:n) = 0.5_stnd

!    d(:n) = 1._stnd
!    e(:n) = 2._stnd
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( d2(n), e2(n), resid(n,n), resid2(n), id(n,n),  &
                  temp(n), temp2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE TRIDIAGONAL MATRIX .
!
        d2(:n) = d(:n)
        e2(:n) = e(:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE TRIDIAGONAL MATRIX TRID
!   WITH SUBROUTINE symtrid_qri3.
!
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC TRIDIAGONAL MATRIX TRID
!   IS WRITTEN
!
!                       TRID = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF TRID; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF TRID.
!
!   ON ENTRY OF symtrid_qri3 d AND e CONTAIN, RESPECTIVELY, THE DIAGONAL AND OFF-DIAGONAL
!   OF THE SYMMETRIC TRIDIAGONAL MATRIX TRID. IF THE OPTIONAL ARGUMENT init_mat IS SET TO
!   TRUE, THE MATRIX ARGUMENT eigvec IS INITIALIZED TO THE IDENTITY MATRIX AND THE SUBROUTINE
!   WILL COMPUTE THE EIGENVECTORS OF TRID. IF init_mat IS NOT USED OR SET TO FALSE, THE QR ITERATIONS
!   ARE DIRECTLY APPLIED TO eigvec WITHOUT ANY INITIAIZATION. THIS IS USEFUL FOR COMPUTING THE
!   EIGENVECTORS OF A FULL SYMMETRIC MATRIX.
!
    call symtrid_qri3( d(:n), e(:n), failure, eigvec(:n,:n), sort=sort, init_mat=true )
!
!   THE ROUTINE RETURNS THE EIGENVALUES AND THE EIGENVECTORS OF TRID.
!
!   ON EXIT OF symtrid_qri3 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF THE TRIDIAGONAL MATRIX TRID .
!
!       eigvec IS OVERWRITTEN WITH THE EIGENVECTORS, STORED COLUMNWISE;
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF TRID.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!   THE EIGENVECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION TRID*U - U*D
!       WHERE D ARE THE EIGENVALUES AND U THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX.
!
        do j=1_i4b, n
!
            temp(1_i4b:n) = eigvec(1_i4b:n,j)
!
            temp2(1_i4b)         = d2(1_i4b)*temp(1_i4b) + e2(1_i4b)*temp(2_i4b)
            temp2(2_i4b:n-1_i4b) = e2(1_i4b:n-2_i4b)*temp(1_i4b:n-2_i4b)           +   &
                                   d2(2_i4b:n-1_i4b)*temp(2_i4b:n-1_i4b)           +   &
                                   e2(2_i4b:n-1_i4b)*temp(3_i4b:n)
            temp2(n)             = e2(n-1_i4b)*temp(n-1_i4b) + d2(n)*temp(n)
!
            resid(1_i4b:n,j) = temp2(1_i4b:n) - d(j)*temp(1_i4b:n)
!
        end do
!
        resid2(:n) = norm( resid(:n,:n), dim=2_i4b )
        normt      = sqrt( sum( d2(1_i4b:n)**2 ) + sum( e2(1_i4b:n-1_i4b)**2 ) )
!
        err1 =  maxval( resid2(:n) )/( normt*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U
!       WHERE U ARE THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX TRID.
!
        call unit_matrix( id(:n,:n) )
!
        resid(:n,:n) = abs( id(:n,:n) - matmul( transpose(eigvec(:n,:n)), eigvec(:n,:n) ) )
!
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resid, resid2, id, temp, temp2, d2, e2, eigvec, d, e )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( eigvec, d, e )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_qri3() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', n, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_symtrid_qri3
! ===============================
!
end program ex1_symtrid_qri3

ex1_symtrid_ratqri.F90

program ex1_symtrid_ratqri
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SYMTRID_RATQRI
!   in module Eig_Procedures for computing the largest or smallest eigenvalues of a
!   real n-ny-n symmetric tridiagonal matrix.
!
!   A rational QR method is used for computing the eigenvalues of the symmetric
!   tridiagonal matrix.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module Eig_Procedures
!    for computing selected eigenvectors of a real symmetric tridiagonal matrix.
!
!
! LATEST REVISION : 27/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack,  only : lgl, i4b, stnd, true, false, zero, one, two, c50,     &
                          allocate_error, merror, trid_inviter, symtrid_ratqri, &
                          norm, unit_matrix 
#ifdef _MATMUL
    use Statpack,  only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Utilities, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE TRIDIAGONAL MATRIX AND neig IS THE NUMBER
! OF THE COMPUTED SMALLEST OR LARGEST EIGENVALUES/EIGENVECTORS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE neig ASSOCIATED EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=4000, neig=200, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of symtrid_ratqri'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, e2, eigval
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid, eigvec
!
    integer(i4b) :: l
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test, small
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE neig LARGEST OR SMALLEST EIGENVALUES OF
!               A SYMMETRIC TRIDIAGONAL MATRIX USING A RATIONAL QR METHOD
!               AND SELECTED EIGENVECTORS BY INVERSE ITERATION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( eps )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE IF YOU WANT TO COMPUTE THE neig SMALLEST OR LARGEST EIGENVALUES.
!
    small = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( eigvec(n,neig), d(n), e(n), e2(n), eigval(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A (1-2-1) TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    d(:n) = two
    e(:n) = one
!
!    call random_number( d(:n) )
!    call random_number( e(:n) )
!
!   SAVE THE TRIDIAGONAL FORM FOR LATER USE.
!
    eigval(:n) = d(:n)
    e2(:n)     = e(:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE neig LARGEST EIGENVALUES OF THE SYMMETRIC TRIDIAGONAL MATRIX.
!
    call symtrid_ratqri( eigval(:n), e2(:n), neig, failure, small=small )
!
!   ON EXIT OF symtrid_ratqri :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE RATIONAL
!                         QR ALGORITHM.
!
!   THE COMPUTED EIGENVALUES ARE STORED IN eigval(:neig)
!   AND THE SYMMETRIC TRIDIAGONAL MATRIX IS OVERWRITTEN.
!
!   NEXT, COMPUTE THE neig ASSOCIATED EIGENVECTORS OF THE TRIDIAGONAL MATRIX BY maxiter INVERSE ITERATIONS.
!
    call trid_inviter( d(:n), e(:n), eigval(:neig), eigvec(:n,:neig), failure2, maxiter=maxiter )
!
!   ON EXIT OF trid_inviter :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM

!       eigvec CONTAINS THE neig EIGENVECTORS ASSOCIATED WITH THE EIGENVALUES eigval(:neig).
!
!       trid_inviter MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!       IDENTICAL.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a(n,n), a2(neig,neig), resid(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       FORM THE TRIDIAGONAL MATRIX.
!
        a(:n,:n) = zero
!
        do l = 1_i4b, n-1_i4b
!
            a(l,l)       = d(l)
            a(l+1_i4b,l) = e(l)
            a(l,l+1_i4b) = e(l)
!
        end do
!
        a(n,n) = d(n)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u - u*s
!       WHERE s ARE THE EIGENVALUES AND u THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        resid = matmul( a, eigvec ) - eigvec*spread( eigval(:neig), 1, n )
!
        err1 = norm(resid)/( norm(a)*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        call unit_matrix( a2 )
!
        resid(:neig,:neig) = a2 - matmul( transpose( eigvec ), eigvec )
!
        err2 = norm(resid(:neig,:neig))/real(n,stnd)
!
        err = max( err1, err2 )
!
!       ALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( eigvec, d, e, e2, eigval )
!
!   PRINT THE RESULTS OF THE TESTS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_ratqri ) = ', failure
    write (prtunit,*) ' FAILURE ( from trid_inviter() ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_symtrid_ratqri
! =================================
!
end program ex1_symtrid_ratqri

ex1_symtrid_ratqri2.F90

program ex1_symtrid_ratqri2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SYMTRID_RATQRI2
!   in module Eig_Procedures for computing the largest or smallest eigenvalues of a
!   real symmetric matrix whose sum of absolute values exceeds a given (positive) threshold.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing selected eigenvectors of a real symmetric matrix.
!
!
! LATEST REVISION : 27/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, two, ten, c50,  &
                         allocate_error, merror, trid_inviter, symtrid_ratqri2,  &
                         norm, unit_matrix 
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Utilities, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX TRIDIAGONAL MATRIX AND
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE SELECTED EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=1000, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of symtrid_ratqri2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: val, err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, e2, eigval
    real(stnd), allocatable, dimension(:,:) :: eigvec, a, a2, resid
!
    integer(i4b) :: l, neig
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test, small
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE LARGEST OR SMALLEST EIGENVALUES OF A n-BY-n REAL
!               SYMMETRIC TRIDIAGONAL MATRIX WHOSE SUM OF ABSOLUTE VALUES EXCEEDS
!               A PRESCRIBED THRESHOLD USING A RATIONAL QR ALGORITHM
!               AND SELECTED EIGENVECTORS BY INVERSE ITERATION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( eps )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE PARAMETERS OF THE RATIONAL QR ALGORITHM.
!
!   DETERMINE THE THRESHOLD FOR COMPUTING THE SMALLEST OR LARGEST EIGENVALUES.
!
    val = ten
!
!   DETERMINE IF YOU WANT TO COMPUTE THE SMALLEST OR LARGEST EIGENVALUES.
!
    small = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( d(n), e(n), e2(n), eigval(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A (1-2-1) TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    d(:n) = two
    e(:n) = one
!
!   SAVE THE TRIDIAGONAL FORM FOR LATER USE.
!
    eigval(:n) = d(:n)
    e2(:n)     = e(:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE LARGEST OR SMALLEST EIGENVALUES OF A SYMMETRIC TRIDIAGONAL MATRIX,
!   WHOSE SUM OF ABSOLUTE VALUES EXCEEDS abs( val ).
!
    call symtrid_ratqri2( eigval(:n), e2(:n), val, failure, neig, small=small )
!
!   ON EXIT OF symtrid_ratqri2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE RATIONAL
!                         QR ALGORITHM.
!
!   THE COMPUTED EIGENVALUES ARE STORED IN eigval(:neig)
!   AND THE SYMMETRIC TRIDIAGONAL MATRIX IS OVERWRITTEN.
!
    if ( .not.failure .and. neig>0_i4b ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( eigvec(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST neig EIGENVECTORS OF THE TRIDIAGONAL MATRIX BY maxiter INVERSE ITERATIONS.
!
        call trid_inviter( d(:n), e(:n), eigval(:neig), eigvec(:n,:neig), failure2, maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. neig>0_i4b ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a(n,n), a2(neig,neig), resid(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       FORM THE TRIDIAGONAL MATRIX.
!
        a(:n,:n) = zero
!
        do l = 1_i4b, n-1_i4b
!
            a(l,l)       = d(l)
            a(l+1_i4b,l) = e(l)
            a(l,l+1_i4b) = e(l)
!
        end do
!
        a(n,n) = d(n)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u - u*s
!       WHERE s ARE THE EIGENVALUES AND u THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        resid = matmul( a, eigvec ) - eigvec*spread( eigval(:neig), 1, n )
!
        err1 = norm(resid)/( norm(a)*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        call unit_matrix( a2 )
!
        resid(:neig,:neig) = a2 - matmul( transpose( eigvec ), eigvec )
!
        err2 = norm(resid(:neig,:neig))/real(n,stnd)
!
        err = max( err1, err2 )
!
!       ALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( d, e, e2, eigval )
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
!   PRINT THE RESULTS OF THE TESTS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_ratqri2 ) = ', failure
!
    if ( .not. failure  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter()  ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure .and. neig>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_symtrid_ratqri2
! ==================================
!
end program ex1_symtrid_ratqri2

ex1_time_to_string.F90

program ex1_time_to_string
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of functions CPUSECS and TIME_TO_STRING
!   in module Time_Procedures for transforming time or a time interval into a string.
!
!
! LATEST REVISION : 04/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, extd, cpusecs, time_to_string
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(extd)   :: tim1, tim2
!
    integer(i4b)  :: i, j
!
    character(len=13) :: string
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of time_to_string'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   FUNCTION cpusecs OBTAINS, FROM THE INTRINSIC ROUTINE SYSTEM_CLOCK,
!   THE CURRENT VALUE OF THE SYSTEM CPU USAGE CLOCK. THIS VALUE
!   IS THEN CONVERTED TO SECONDS AND RETURNED AS AN EXTENDED PRECISION
!   REAL VALUE.
!
!   THIS FUNCTIONS ASSUMES THAT THE NUMBER OF CPU CYCLES (CLOCK COUNTS) BETWEEN
!   TWO CALLS IS LESS THAN COUNT_MAX, THE MAXIMUM POSSIBLE VALUE OF CLOCK COUNTS
!   AS RETURNED BY THE INTRINSIC ROUTINE SYSTEM_CLOCK.
!
!   THIS ROUTINE WILL NOT WORK PROPERLY WITH OPENMP.
!
!   A TYPICAL USE OF THIS FUNCTION IS AS FOLLOWS :
!
    tim1 = cpusecs()
    j = 0
    do i=1, 1000000000
        j = j + 1
    end do
    tim2 = cpusecs()
!
!   CONVERT THE CPU TIME tim2-tim1 TO A STRING FORMAT FOR PRINTING AS
!
!           'hours.minutes.seconds.milliseconds'
!
!   WITH SUBROUTINE time_to_string .
!
    string = time_to_string( tim2-tim1 )
!
!   PRINT THE RESULT.
!
    write (prtunit, *)  " CPU Time(s): " // string //  " => hours.minutes.seconds.milliseconds "
!
!
! END OF PROGRAM ex1_time_to_string
! =================================
!
end program ex1_time_to_string

ex1_transpose2.F90

program ex1_transpose2
!
!
! Purpose
! =======
!
!   This program illustrates the use of function TRANSPOSE2 in module Module_Utilities
!   and compares its efficiency with the intrinsic TRANSPOSE function for transposing
!   a real matrix.
!
!   If OpenMP is used, TRANSPOSE2 function must be much faster than the intrinsic
!   TRANSPOSE function for many compilers.
!
!
! LATEST REVISION : 28/11/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, transpose2, merror, allocate_error
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=10000, m=10000
!
    character(len=*), parameter :: name_proc='Example 1 of transpose2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: elapsed_time1, elapsed_time2
    real(stnd), dimension(:,:), allocatable :: a, a2, at
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, failure
    logical(lgl), dimension(:,:), allocatable :: test
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : TRANSPOSITION OF A REAL MATRIX.
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
    failure = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), at(m,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-m RANDOM REAL MATRIX a .
!
    call random_number( a(:n,:m) )
!
!   TRANSPOSE THE MATRIX WITH transpose2 FUNCTION.
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
    at(:m,:n) = transpose2( a(:n,:m) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time1 = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS .
!
        allocate( a2(n,m), test(n,m), stat=iok )
!
        if ( iok/=0 ) then
           call merror( name_proc//allocate_error )
        end if
!
        a2(:n,:m) = transpose( at(:m,:n) )
!
!       CHECK THE RESULTS.
!
        test(:n,:m) = a(:n,:m) /= a2(:n,:m)
!
        failure = any( test(:n,:m) )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, test )
!
    end if
!
!   NOW TRANSPOSE THE MATRIX WITH INTRINSIC transpose FUNCTION.
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
    at(:m,:n) = transpose( a(:n,:m) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time2 = real( itime, stnd )/real( irate, stnd )
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, at )
!
!   CHECK AND PRINT THE RESULTS.
!
    if ( .not. failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for transposing a real matrix of size ', n, ' by ', m,  &
       ' with transpose2() function is ', elapsed_time1, ' seconds'
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for transposing a real matrix of size ', n, ' by ', m,  &
       ' with the intrinsic transpose() function is ', elapsed_time2, ' seconds'
!
!
! END OF PROGRAM ex1_transpose2
! =============================
!
end program ex1_transpose2

ex1_trid_deflate.F90

program ex1_trid_deflate
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine TRID_DEFLATE
!   in module Eig_Procedures for computing all or selected eigenvectors of a
!   real symmetric matrix by a deflation method.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines SYMTRID_CMP and SYMTRID_BISECT
!    in module EIG_Procedures.
!
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6,          &
                         safmin, symtrid_cmp, trid_deflate, symtrid_bisect, norm, unit_matrix,    &
                         random_seed_, random_number_, gen_random_sym_mat, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX,
! neig0 IS THE NUMERICAL RANK OF THE GENERATED SYMMETRIC MATRIX (WHICH MUST BE LESS OR EQUAL TO n) FOR CASES GREATER THAN 0,
! nvec IS THE NUMBER OF EIGENVALUES AND EIGENVECTORS, WHICH MUST BE COMPUTED.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig0=3000, nvec=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of trid_deflate'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, safmin, abstol, tmp, tmp2, &
                                               ulp, anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: eigval, resid2, d, e
    real(stnd), allocatable, dimension(:,:) :: a, eigvec, a2
!
    integer       :: iok, istart, iend, irate, imax, itime
    integer(i4b)  :: i, mat_type, max_qr_steps, neig
!
    logical(lgl)  :: failure, failure2, do_test, ortho
!   
    character     :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SELECTED EIGENVALUES AND, OPTIONALLY, ASSOCIATED EIGENVECTORS OF
!               A REAL SYMMETRIC MATRIX USING BISECTION FOR EIGENVALUES
!               AND A DEFLATION METHOD FOR THE EIGENVECTORS.
!
!   SPECIFY THE TYPE OF INPUT SYMMETRIC MATRIX:
!
!   mat_type < 1  -> RANDOM SYMMETRIC MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES
!   mat_type = 4  -> VERY SLOW DECAY OF EIGENVALUES
!   mat_type = 5  -> STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF EIGENVALUES
!
    mat_type = 2_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE PARAMETERS OF THE BISECTION ALGORITHM.
!
!   THE EIGENVALUES ARE COMPUTED WITH MAXIMUM ACCURACY WHEN OPTIONAL PARAMETER abstol IS SET TO
!   sqrt( safmin ), WHICH IS THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD.
!
    abstol = sqrt( safmin )
!
!   DETERMINE PARAMETERS OF THE DEFLATION ALGORITHM.
!
!   OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED EIGENVECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF EIGENVALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE EIGENVECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A TRIDIAGONAL MATRIX FOR A GIVEN EIGENVALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL REQUESTED
!   EIGENVALUES EXCEEDS max_qr_steps * nvec.
!
    max_qr_steps = 4_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), eigvec(n,nvec), eigval(n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM SYMMETRIC MATRIX OR EIGENVALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM SYMMETRIC MATRIX.
!
            call random_number_( a )
!
            a = a + transpose( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM SYMMETRIC MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                d(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            tmp = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                d(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE.
!
            d(:neig0-1_i4b) = one
            d(neig0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                d(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, neig0
!
                d(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF EIGENVALUES.
!
            call random_number_( d(:neig0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( d ) ) then
!
            if ( .not.all( ieee_is_normal( d(:neig0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input symmetric matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF EIGENVALUES AND A RANK EQUAL TO neig0.
!
        call gen_random_sym_mat( d(:neig0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( d(:neig0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), resid2(nvec), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
!   FIRST REDUCE THE SYMMETRIC MATRIX TO SYMMETRIC TRIDIAGONAL FORM BY ORTHOGONAL
!   TRANSFORMATIONS WITH SUBROUTINE symtrid_cmp. THE ORTHOGONAL TRANSFORMATIONS
!   ARE SAVED IF THE OPTIONAL PARAMETER store_q IS SET TO TRUE.
!
    call symtrid_cmp( a(:n,:n), d(:n), e(:n), store_q=true )
!
!   ON EXIT OF symtrid_cmp:
!
!   a IS OVERWRITTEN WITH THE HOUSEHOLDER TRANSFORMATIONS USED TO REDUCE a
!   TO TRIDIAGONAL FORM IF THE OPTIONAL PARAMETER store_q IS SET TO TRUE,
!   OTHERWISE a IS DESTROYED.
!   ARGUMENTS d and e CONTAIN, RESPECTIVELY THE DIAGONAL AND OFF-DIAGONAL
!   ELEMENTS OF THE TRIDIAGONAL MATRIX.
!
!   SECOND, COMPUTE THE nvec LARGEST  EIGENVALUES OF THE SELF-ADJOINT MATRIX a TO HIGH
!   ACCURACY WITH SUBROUTINE symtrid_bisect.
!
    call symtrid_bisect( d(:n), e(:n), neig, eigval(:n), failure, &
                         sort=sort, abstol=abstol, le=nvec )
!
!   ON EXIT OF symtrid_bisect:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       eigval IS OVERWRITTEN WITH THE EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!       IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!       IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
!   NEXT, COMPUTE THE FIRST nvec EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY A
!   DEFLATION TECHNIQUE APPLIED ON THE INTERMEDIATE TRIDIAGONAL MATRIX (STORED
!   IN VECTORS d AND e) AND BACK-TRANSFORMATION.
!
!   ON ENTRY OF SUBROUTINE trid_deflate, PARAMETER eigval CONTAINS SELECTED
!   EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!   THE EIGENVALUES VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call trid_deflate( d(:n), e(:n), eigval(:nvec), eigvec(:n,:nvec), failure=failure2,     &
                       mat=a, ortho=ortho, max_qr_steps=max_qr_steps )
!
!   ON EXIT OF trid_deflate :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM.
!
!       eigvec CONTAINS THE nvec EIGENVECTORS OF a ASSOCIATED WITH THE EIGENVALUES eigval(:nvec).
!
!   trid_deflate MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!   IDENTICAL AND IF THESE EIGENVALUES ARE NOT COMPUTED WITH HIGH ACCURACY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        a(:n,:nvec) = matmul( a2, eigvec ) - eigvec*spread( eigval(:nvec), 1, n )
        resid2(:nvec) = norm( a(:n,:nvec), dim=2_i4b )
!
        err1 =  maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a(:nvec,:nvec) )
!
        a2(:nvec,:nvec) = abs( a(:nvec,:nvec) - matmul( transpose( eigvec ), eigvec ) )
!
        err2 = maxval(a2(:nvec,:nvec))/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, eigvec, eigval, d, e, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, eigvec, eigval, d, e )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random symmetric matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered eigenvalues at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of eigenvalues'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_bisect() ) = ', failure
    write (prtunit,*) ' FAILURE ( from trid_deflate()   ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', nvec, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_trid_deflate
! ===============================
!
end program ex1_trid_deflate

ex1_trid_inviter.F90

program ex1_trid_inviter
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine TRID_INVITER
!   in module Eig_Procedures for computing all or selected eigenvectors of a
!   real symmetric matrix by inverse iterations.                                                                  
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine EIGVAL_CMP in module EIG_Procedures.
!                                                                              
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6, &
                         eigval_cmp, trid_inviter, norm, unit_matrix, random_seed_,      &
                         random_number_, gen_random_sym_mat, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX,
! neig0 IS THE NUMERICAL RANK OF THE GENERATED SYMMETRIC MATRIX (WHICH MUST BE LESS OR EQUAL TO n) FOR CASES GREATER THAN 0,
! nvec IS THE NUMBER OF EIGENVECTORS, WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig0=3000, nvec=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of trid_inviter'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, tmp, tmp2, ulp, &
                                               anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: eigval, resid2
    real(stnd), allocatable, dimension(:,:) :: a, eigvec, a2, d_e
!
    integer       :: iok, istart, iend, irate, imax, itime
    integer(i4b)  :: i, mat_type
!
    logical(lgl)  :: failure, failure2, do_test
!   
    character     :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : EIGENVALUES AND, OPTIONALLY, SELECTED EIGENVECTORS OF
!               A REAL SYMMETRIC MATRIX USING THE INVERSE ITERATION METHOD.
!
!   SPECIFY THE TYPE OF INPUT SYMMETRIC MATRIX:
!
!   mat_type < 1  -> RANDOM SYMMETRIC MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES
!   mat_type = 4  -> VERY SLOW DECAY OF EIGENVALUES
!   mat_type = 5  -> STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF EIGENVALUES
!
    mat_type = 3_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), eigvec(n,nvec), eigval(n), d_e(n,2_i4b), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM SYMMETRIC MATRIX OR EIGENVALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM SYMMETRIC MATRIX.
!
            call random_number_( a )
!
            a = a + transpose( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM SYMMETRIC MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            tmp = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                eigval(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE.
!
            eigval(:neig0-1_i4b) = one
            eigval(neig0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                eigval(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, neig0
!
                eigval(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF EIGENVALUES.
!
            call random_number_( eigval(:neig0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( eigval ) ) then
!
            if ( .not.all( ieee_is_normal( eigval(:neig0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input symmetric matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF EIGENVALUES AND A RANK EQUAL TO neig0.
!
        call gen_random_sym_mat( eigval(:neig0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( eigval(:neig0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), resid2(nvec), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
!   FIRST, COMPUTE ALL EIGENVALUES OF THE SELF-ADJOINT MATRIX a AND SAVE
!   THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e WITH
!   SUBROUTINE eigval_cmp.
!
    call eigval_cmp( a, eigval, failure=failure, sort=sort, d_e=d_e )
!
!   ON EXIT OF eigval_cmp:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       a IS OVERWRITTEN WITH THE HOUSEHOLDER TRANSFORMATIONS USED TO REDUCE a
!       TO TRIDIAGONAL FORM IF THE OPTIONAL PARAMETER d_e IS PRESENT, OTHERWISE
!       a IS DESTROYED.
!
!       eigval IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!       IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!       IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
!       d_e IS AN OPTIONAL ARGUMENT TO SAVE THE INTERMEDIATE TRIDIAGONAL FORM OF a.
!
!   NEXT COMPUTE THE FIRST nvec EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!   maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!   d_e AND BACK-TRANSFORMATION.
!
!   ON ENTRY OF SUBROUTINE trid_inviter, PARAMETER eigval CONTAINS SELECTED
!   EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!   THE EIGENVALUES VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call trid_inviter( d_e(:n,1), d_e(:n,2), eigval(:nvec), eigvec(:n,:nvec), failure=failure2,   &
                       mat=a, maxiter=maxiter )
!
!   ON EXIT OF trid_inviter :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM

!       eigvec CONTAINS THE nvec EIGENVECTORS ASSOCIATED WITH THE EIGENVALUES eigval(:nvec).
!
!       trid_inviter MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!       IDENTICAL.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        a(:n,:nvec) = matmul( a2, eigvec ) - eigvec*spread( eigval(:nvec), 1, n )
        resid2(:nvec) = norm( a(:n,:nvec), dim=2_i4b )
!
        err1 =  maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a(:nvec,:nvec) )
!
        a2(:nvec,:nvec) = abs( a(:nvec,:nvec) - matmul( transpose( eigvec ), eigvec ) )
!
        err2 = maxval(a2(:nvec,:nvec))/real(n,stnd)
!
        err = max( err1, err2 )
!
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, eigvec, eigval, d_e, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, eigvec, eigval, d_e )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random symmetric matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered eigenvalues at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of eigenvalues'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eigval_cmp()   ) = ', failure
    write (prtunit,*) ' FAILURE ( from trid_inviter() ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', nvec, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_trid_inviter
! ===============================
!
end program ex1_trid_inviter

ex1_trid_inviter_bis.F90

program ex1_trid_inviter
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine TRID_INVITER
!   in module Eig_Procedures for computing all or selected eigenvectors of a
!   real symmetric matrix by inverse iterations.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines SYMTRID_CMP and SYMTRID_BISECT
!    in module EIG_Procedures.
!                                                                              
! LATEST REVISION : 28/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, c1_e6,           &
                         safmin, symtrid_cmp, symtrid_bisect, trid_inviter, norm, unit_matrix,     &
                         random_seed_, random_number_, gen_random_sym_mat, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX,
! neig0 IS THE NUMERICAL RANK OF THE GENERATED SYMMETRIC MATRIX (WHICH MUST BE LESS OR EQUAL TO n) FOR CASES GREATER THAN 0,
! nvec IS THE NUMBER OF EIGENVALUES AND EIGENVECTORS, WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig0=3000, nvec=3000, maxiter=2
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7,
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 1 of trid_inviter'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, safmin, abstol, tmp, tmp2, &
                                               ulp, anorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: eigval, resid2, d, e
    real(stnd), allocatable, dimension(:,:) :: a, eigvec, a2
!
    integer       :: iok, istart, iend, irate, imax, itime
    integer(i4b)  :: i, mat_type, neig
!
    logical(lgl)  :: failure, failure2, do_test
!   
    character     :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : SELECTED EIGENVALUES BY A BISECION METHOD AND, OPTIONALLY, ASSOCIATED EIGENVECTORS
!               OF A REAL SYMMETRIC MATRIX USING THE INVERSE ITERATION METHOD.
!
!   SPECIFY THE TYPE OF INPUT SYMMETRIC MATRIX:
!
!   mat_type < 1  -> RANDOM SYMMETRIC MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF EIGENVALUES
!   mat_type = 2  -> FAST DECAY OF EIGENVALUES
!   mat_type = 3  -> S-SHAPED DECAY OF EIGENVALUES
!   mat_type = 4  -> VERY SLOW DECAY OF EIGENVALUES
!   mat_type = 5  -> STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF EIGENVALUES
!
    mat_type = 2_i4b
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
    abstol = sqrt( safmin )
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), eigvec(n,nvec), eigval(n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM SYMMETRIC MATRIX OR EIGENVALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM SYMMETRIC MATRIX.
!
            call random_number_( a )
!
            a = a + transpose( a )
!
!           COMPUTE THE FROBENIUS NORM OF THE RANDOM SYMMETRIC MATRIX.
!
            anorm = norm( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF EIGENVALUES.
!
            do i = 1_i4b, neig0
!
                tmp = real( i, stnd )
!
                eigval(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF EIGENVALUES.
!
            tmp = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                eigval(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED EIGENVALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK SYESVD ROUTINE.
!
            eigval(:neig0-1_i4b) = one
            eigval(neig0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
!
            do i = 1_i4b, neig0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                eigval(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF EIGENVALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK SYESVD ROUTINE.
!
            tmp  = real( neig0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, neig0
!
                eigval(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF EIGENVALUES.
!
            call random_number_( eigval(:neig0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( eigval ) ) then
!
            if ( .not.all( ieee_is_normal( eigval(:neig0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input symmetric matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       GENERATE A n-BY-n RANDOM REAL SYMMETRIC MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF EIGENVALUES AND A RANK EQUAL TO neig0.
!
        call gen_random_sym_mat( eigval(:neig0), a )
!
!       COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( eigval(:neig0) )
!
    end if
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), resid2(nvec), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
!   FIRST REDUCE THE SYMMETRIC MATRIX TO SYMMETRIC TRIDIAGONAL FORM BY ORTHOGONAL
!   TRANSFORMATIONS WITH SUBROUTINE symtrid_cmp. THE ORTHOGONAL TRANSFORMATIONS
!   ARE SAVED IF THE OPTIONAL PARAMETER store_q IS SET TO TRUE.
!
    call symtrid_cmp( a(:n,:n), d(:n), e(:n), store_q=true )
!
!   ON EXIT OF symtrid_cmp:
!
!   a IS OVERWRITTEN WITH THE HOUSEHOLDER TRANSFORMATIONS USED TO REDUCE a
!   TO TRIDIAGONAL FORM IF THE OPTIONAL PARAMETER store_q IS SET TO TRUE,
!   OTHERWISE a IS DESTROYED.
!   ARGUMENTS d and e CONTAIN, RESPECTIVELY THE DIAGONAL AND OFF-DIAGONAL
!   ELEMENTS OF THE TRIDIAGONAL MATRIX.
!
!   SECOND, COMPUTE THE nvec LARGEST  EIGENVALUES OF THE SELF-ADJOINT MATRIX a TO HIGH
!   ACCURACY WITH SUBROUTINE symtrid_bisect.
!
    call symtrid_bisect( d(:n), e(:n), neig, eigval(:n), failure, &
                         sort=sort, abstol=abstol, le=nvec )
!
!   ON EXIT OF symtrid_bisect:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       eigval IS OVERWRITTEN WITH THE EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!       IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!       IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
!   NEXT COMPUTE THE FIRST nvec EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!   maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX (STORED
!   IN VECTORS d AND e) AND BACK-TRANSFORMATION.
!
!   ON ENTRY OF SUBROUTINE trid_inviter, PARAMETER eigval CONTAINS SELECTED
!   EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!   THE EIGENVALUES VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call trid_inviter( d(:n), e(:n), eigval(:nvec), eigvec(:n,:nvec), failure=failure2,   &
                       mat=a, maxiter=maxiter )
!
!   ON EXIT OF trid_inviter :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM

!       eigvec CONTAINS THE nvec EIGENVECTORS OF a ASSOCIATED WITH THE EIGENVALUES eigval(:nvec).
!
!       trid_inviter MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!       IDENTICAL.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        a(:n,:nvec) = matmul( a2, eigvec ) - eigvec*spread( eigval(:nvec), 1, n )
        resid2(:nvec) = norm( a(:n,:nvec), dim=2_i4b )
!
        err1 =  maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a(:nvec,:nvec) )
!
        a2(:nvec,:nvec) = abs( a(:nvec,:nvec) - matmul( transpose( eigvec ), eigvec ) )
!
        err2 = maxval(a2(:nvec,:nvec))/real(n,stnd)
!
        err = max( err1, err2 )
!
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, eigvec, eigval, d, e, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, eigvec, eigval, d, e )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random symmetric matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of eigenvalues'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered eigenvalues at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of eigenvalues'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK SYESVD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of eigenvalues'
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_bisect() ) = ', failure
    write (prtunit,*) ' FAILURE ( from trid_inviter()   ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', nvec, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_trid_inviter
! ===============================
!
end program ex1_trid_inviter

ex1_ts_id_cmp.F90

program ex1_ts_id_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines TS_ID_CMP in 
!   module Random and ORTHO_GEN_QR in module QR_Procedures for computing a two-sided
!   (randomized or deterministic) interpolative decomposition.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c50, ts_id_cmp,  &
                         ortho_gen_qr, norm, merror, allocate_error, gen_random_mat,          &
                         random_seed_
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX;
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX;
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX;
! nid IS THE TARGET RANK OF THE TWO SIDED INTERPOLATIVE DECOMPOSITION, WHICH IS SOUGHT.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, nsvd0=2000, nid=100
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 1 of ts_id_cmp'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, err4, err5, tmp, norma, normr, &
                                               eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, singval0
    real(stnd), allocatable, dimension(:,:) :: a, w, v, skela, skelav, resid
!
    integer(i4b)                            :: i, blk_size, nover, mat_type
    integer(i4b), allocatable, dimension(:) :: ip_row, ip_col
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, random_qr
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE A (RANDOMIZED OR DETERMINISTIC) TWO SIDED
!               INTERPOLATIVE DECOMPOSITION (ID) OF A DATA MATRIX.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type > 3  -> VERY SLOW DECAY OF SINGULAR VALUES
!
    mat_type = 3_i4b
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE RESULTS OF THE SUBROUTINE.
!
    do_test = true
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   DETERMINE PARAMETERS OF THE TWO-SIDED INTERPOLATIVE DECOMPOSITION (ID) ALGORITHM.
!
!   SET TOLERANCE FOR CHECKING THE RANK OF THE TWO-SIDED ID APPROXIMATION IN THE SUBROUTINE.
!
    tol = eps
!
!   SPECIFY IF A RANDOMIZED OR DETERMINISTIC TWO SIDED ID ALGORITHM IS USED.
!
    random_qr = true
!
!   DETERMINE THE BLOCKSIZE PARAMETER IN THE RANDOMIZED TWO SIDED ID ALGORITHM.
!
    blk_size = 20_i4b
!
!   DETERMINE THE OVERSAMPLING SIZE PARAMETER IN THE RANDOMIZED TWO SIDED ID ALGORITHM.
!
    nover = 10_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        i = max( m, n )
!
        allocate( a(m,i), diagr(nid), beta(nid), ip_row(m), ip_col(n), singval0(nsvd0),   &
                  skela(nid,nid), w(m,nid), v(nid,n), skelav(nid,n), resid(m,i), stat=iok )
!
    else
!
        allocate( a(m,n), diagr(nid), beta(nid), ip_row(m), ip_col(n), singval0(nsvd0),  &
                  skela(nid,nid), w(m,nid), v(nid,n), stat=iok )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!   OF SINGULAR VALUES.
!
!   GENERATE SINGULAR VALUES.
!
    select case( mat_type )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                singval0(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case default
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            norma = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i - 1_i4b, stnd )
!
                singval0(i) = exp( -tmp/norma )
!        
            end do
!
    end select
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A m-BY-n MATRIX a OF RANK nsvd0 .
!
    call gen_random_mat( singval0(:nsvd0), a )
!
!   SAVE THE INPUT MATRIX FOR LATER USE IF REQUIRED.
!
    if ( do_test ) then
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   COMPUTE THE FROBENIUS NORM OF THE MATRIX.
!
!    norma = norm( a(:m,:n) )
    norma = sqrt(sum( singval0(:nsvd0)**2 ) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A (RANDOMIZED OR DETERMINISTIC) TWO SIDED ID DECOMPOSITION OF A DATA MATRIX a
!   WITH SUBROUTINE ts_id_cmp. THE RANK OF THE TWO SIDED ID DECOMPOSITION IS
!   DETERMINED BY THE NUMBER OF ROWS (AND COLUMNS) OF THE ARRAY ARGUMENT skela,
!   nid = size(skela,1) = size(skela,2) .
!
    call ts_id_cmp( a(:m,:n), ip_row(:m), ip_col(:n), w(:m,:nid), v(:nid,:n), skela(:nid,:nid), &
                    diagr=diagr(:nid), beta=beta(:nid), rnorm=normr, tol=tol,                   &
                    random_qr=random_qr, blk_size=blk_size, nover=nover )
!
!   THE ROUTINE COMPUTES A (RANDOMIZED OR DETERMINISTIC) TWO SIDED ID DECOMPOSITION OF a AS:
!
!                       a ≈ w * skela * v
!
!   WHERE w IS A m-BY-nid MATRIX, skela IS A nid-BY-nid SQUARED MATRIX, WHICH CONSISTS OF A SUBMATRIX
!   OF a AND DEFINED THE SO_CALLED SKELETON OF a, AND v IS A nid-BY-n MATRIX. THE w, skela AND v MATRICES
!   ARE ESTIMATED TO MINIMIZE THE ERROR OF THE TWO SIDED ID DECOMPOSITION.
!
!   SUCH TWO SIDED ID DECOMPOSITION CAN BE COMPUTED EFFICIENTLY WITH THE HELP OF A (RANDOMIZED
!   OR DETERMINISTIC) PARTIAL QR DECOMPOSITION WITH COLUMN PIVOTING OF a AND A RANDOMIZED
!   OR DETERMINISTIC) COMPLETE QR DECOMPOSITION WITH COLUMN PIVOTING OF A MATRIX DERIVED FROM
!   THE QR DECOMPOSITION OF a.
!
!   MORE PRECISELY, A (RANDOMIZED OR DETERMINISTIC) PARTIAL QR DECOMPOSITION OF a IS FIRST COMPUTED AS:
!
!                     a * P ≈ Q * R = Q * [ R11  R12 ]
!
!   WHERE P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-nid MATRIX WITH ORTHOGONAL COLUMNS,
!   R IS A nid-BY-n UPPER OR TRAPEZOIDAL MATRIX AND R11 IS A nid-BY-nid UPPER TRIANGULAR MATRIX.
!
!   THIS LEADS TO THE COLUMN ID DECOMPOSITION OF a AS:
!
!                     a ≈ C * v    WITH  C = Q * R11  AND  v = [ I  inv(R11)*R12 ] * P'
!
!   WHERE C IS A m-BY-nid MATRIX, WHICH CONSISTS OF A SUBSET OF nid COLUMNS OF a,
!   v IS A nid-BY-n MATRIX AND I IS THE IDENTITY MATRIX OF ORDER nid.
!
!   IN A SECOND STEP, IF WE PERFORMED A COMPLETE (RANDOMIZED OR DETERMINISTIC) COLUMN ID
!   DECOMPOSITION OF C' (E.G., A ROW ID DECOMPOSITION OF C) AS:
!
!                   C' = skela' * w'
!
!   WHERE skela IS A nid-BY-nid SQUARED MATRIX (WHICH IS A SUBMATRIX OF a) AND w IS A
!   m-BY-nid MATRIX, THIS GIVES THE DESIRED TWO SIDED ID DECOMPOSITION OF a AS:
!
!                       a ≈ w * skela * v
!
!   AND THE FROBENIUS NORM OF THE ERROR OF THIS TWO SIDED ID DECOMPOSITION OF a IS THE SAME
!   AS THAT OF THE PARTIAL QR DECOMPOSITION OF a OR ITS COLUMN ID DECOMPOSITION.
!
!   ON EXIT OF id_cmp, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS PARTIAL QR FACTORIZATION:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:nid) AND THE ARRAY
!     beta(:nid) STORES Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a(:nid,:n) CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr. 
!
!   - ip_col STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip_col(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip_col AS FOLLOWS:
!     IF ip_col(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!
!   - ip_row STORES THE PERMUTATION MATRIX N IN THE QR DECOMPOSITION OF C'.
!     IF ip_row(j)=k, THEN THE jTH ROW OF N*a WAS THE kTH ROW OF a.
!     THE MATRIX N IS REPRESENTED IN THE ARRAY ip_row AS FOLLOWS:
!     IF ip_row(j) = i THEN THE jTH row OF N IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. THEN, tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R11, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE PARTIAL QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE NUMERICAL RANK OF R11 AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ THE CALCULATIONS TO DETERMINE THE
!       CONDITION NUMBER OF R11 ARE NOT PERFORMED AND THE RANK OF R11 IS ASSUMED TO
!       BE EQUAL TO nid.
!
!   THE SUBROUTINE WILL EXIT WITH AN ERROR MESSAGE IF THE RANK OF R11 IS LESS THAN nid.
!
!   IT IS POSSIBLE THAT a MAY NOT BE OF FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS) AND THAT R11 IS SINGULAR, THEN THE LINEARLY
!   DEPENDENT COLUMNS CAN USUALLY BE EXCLUDED FROM THE QR (AND ID) APPROXIMATION AND
!   THE RANK OF R11 CAN BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!
!   FINALLY, NOTE THAT THE SQUARED MATRIX skela IS DEFINED AS:
!
!                 skela(:nid,:nid) = a(ip_row(:nid),ip_col(:nid))
!
!   WHERE ip_col IS AN INTEGER ARRAY STORING THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION
!   WITH COLUMN PIVOTING OF a AND ip_row  IS AN INTEGER ARRAY STORING THE PERMUTATION MATRIX N
!   IN THE QR DECOMPOSITION WITH COLUMN PIVOTING OF C'.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   COMPUTE RELATIVE ERROR OF THE TWO SIDED ID APPROXIMATION.
!
    err1 = normr/norma
!                
    if ( do_test ) then
!
!       CHECK COMPUTATION OF THE SKELETON OF MATRIX a .
!
        skelav(:nid,:nid) = skela(:nid,:nid) - resid(ip_row(:nid),ip_col(:nid))
!
        err2 = norm( skelav(:nid,:nid) )
!
!       CHECK ACCURACY OF THE FROBENIUS NORM OF THE RESIDUAL MATRIX.
!
        skelav(:nid,:n) = matmul( skela(:nid,:nid), v(:nid,:n) )
!
        resid(:m,:n) = resid(:m,:n) - matmul( w(:m,:nid), skelav(:nid,:n) )
!
        if ( normr<=one ) then
!
            err3 = abs( norm( resid(:m,:n) ) - normr )
!
        else
!
            err3 = abs( norm( resid(:m,:n) )/normr - one )
!
        end if
!
!       GENERATE ORTHOGONAL MATRIX Q OF PARTIAL QR DECOMPOSITION OF DATA MATRIX a .
!
        call ortho_gen_qr( a(:m,:m), beta(:nid) )
!
!       HERE ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS THE PRODUCT OF nid ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(nid)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp, partial_rqr_cmp2
!       partial_rtqr_cmp, id_cmp AND ts_id_cmp SUBROUTINES.
!
!       THE SIZE OF beta DETERMINES THE NUMBER OF ELEMENTARY REFLECTORS WHOSE PRODUCT
!       DEFINES THE MATRIX Q.
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION w(:m,:nid) - Q(:m,:nid)*(Q(:m,:nid)'*w(:m,:nid)).
!
        skelav(:nid,:nid) = matmul( transpose(a(:m,:nid)), w(:m,:nid) )
!
        resid(:m,:nid) = abs( w(:m,:nid) - matmul( a(:m,:nid), skelav(:nid,:nid) ) )
!
        err4 = maxval( resid(:m,:nid) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q**(t)*Q.
!
!        call unit_matrix( resid(:m,:m) )
!
!        resid(:m,:m) = abs( resid(:m,:m) - matmul( transpose(a(:m,:m)), a(:m,:m) ) )
!        err4 = maxval( resid(:m,:m) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF w(:m,:nid) AND ITS ORTHOGONAL COMPLEMENT Q(:m,nid+1:m).
!
        if ( m>nid ) then
!
            resid(:nid,nid+1:m) = matmul( transpose(w(:m,:nid)), a(:m,nid+1:m) )
!
            err5 = maxval( abs( resid(:nid,nid+1:m) ) )/real(m,stnd)
!
        else
!
            err5 = zero
!
        end if
!
        err = max( err2, err3, err4, err5 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resid, skelav )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip_row, ip_col, singval0, w, v, skela )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type > 3  -> very slow decay of singular values'
!
    write (prtunit,*) 
    write (prtunit,*) 'Rank of the two sided ID approximation     &
                      &                                      = ', nid
!        
    write (prtunit,*) 'Relative error of the two sided ID decomposition &
                      &||A - W*SKELA*V||_F/||A||_F     = ', err1
!
    if ( do_test ) then
!        
        write (prtunit,*) 'Accuracy of the range of the two sided ID &
                          &approximation                          = ', err4
!
        if ( m>nid ) then
            write (prtunit,*) 'Orthogonality of the range of the ID approximation&
                              & and its orthogonal complement = ', err5
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing a (randomized) two sided ID decomposition of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex1_ts_id_cmp
! ============================
!
end program ex1_ts_id_cmp

ex1_ymd_to_daynum.F90

program ex1_ymd_to_daynum
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function YMD_TO_DAYNUM
!   in module Time_Procedures for computing the time interval between two dates.
!
!   See also program ex1_daynum_to_ymd.f90 and W. Kahan webpage
!   (https://people.eecs.berkeley.edu/~wkahan/) for more information.
!
!
! LATEST REVISION : 04/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, ymd_to_daynum, get_date
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer(i4b) :: iyr, imon, iday, julday, iyr2, imon2, iday2, julday2
!
    character(len=14) :: date, date2
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of ymd_to_daynum'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A DATE.
!
    iyr  = 1902
    imon = 11
    iday = 15
!
!   GENERATE ANOTHER DATE.
!
    iyr2  = 1982
    imon2 = 10
    iday2 = 22
!
!   CONVERTS GREGORIAN YEAR (iyr), MONTH (imon) AND DAY (iday) TO JULIAN DAY
!   NUMBER.
!
    julday = ymd_to_daynum( iyr, imon, iday )
!
!   FUNCTION ymd_to_daynum CONVERTS THE THREE INTEGERS iyr, imon AND iday STANDING FOR
!   YEAR, MONTH, DAY IN THE GREGORIAN CALENDAR PROMULGATED BY GREGORY XIII ON
!   FRIDAY, 15 OCTOBER 1582, IN THE CORRESPONDING JULIAN DAY NUMBER STARTING
!   WITH ymd_to_daynum=1 ON FRIDAY, 15 OCTOBER 1582.
!
!   NOTE THE GREGORIAN CALENDAR WAS ADOPTED IN OCT. 15, 1582, AND HENCE
!   THIS FUNCTION WILL NOT WORK PROPERLY FOR DATES EARLY THAN 10-15-1582.
!   
!   CONVERTS GREGORIAN YEAR (iyr2), MONTH (imon2) AND DAY (iday2) TO JULIAN DAY
!   NUMBER.
!
    julday2 = ymd_to_daynum( iyr2, imon2, iday2 )
!
!   THE NUMBER OF DAYS BETWEEN TWO DATES IS THE DIFFERENCE BETWEEN THEIR
!   JULIAN DAYS. SO, ONE OF THE MOST USEFUL APPLICATIONS FOR THIS ROUTINE
!   IS TO COMPUTE THE NUMBER OF DAYS BETWEEN TWO DATES.
!
    call get_date( iyr, imon, iday, date )
    call get_date( iyr2, imon2, iday2, date2 )
!
    write (prtunit,*)      &
      'The number of days between ' // date2 // ' and ' // date // ' is ', julday2-julday
!
!
! END OF PROGRAM ex1_ymd_to_daynum
! ================================
!
end program ex1_ymd_to_daynum

ex1_ymd_to_dayweek.F90

program ex1_ymd_to_dayweek
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of function YMD_TO_DAYWEEK
!   in module Time_Procedures to determine the day of the week from a given date.
!
!
! LATEST REVISION : 04/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, ymd_to_dayweek, days, get_date
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
!
    integer(i4b), parameter :: prtunit=6
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    integer(i4b) :: iyr, imon, iday, idayweek
!
    character(len=11) :: date
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 1 of ymd_to_dayweek'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE A DATE.
!
    iyr  = 2023
    imon = 12
    iday = 4
!
!   DETERMINE THE DAY OF THE WEEK FROM FROM GREGORIAN YEAR (iyr),
!   MONTH (imon) AND DAY (iday).
!
    idayweek = ymd_to_dayweek( iyr, imon, iday )
!
!   FUNCTION ymd_to_dayweek RETURNS THE DAY OF THE WEEK (E.G., MON, TUE,...) AS AN INTEGER 
!   INDEX (MON=1 TO SUN=7) FOR THE GIVEN YEAR, MONTH, AND DAY IN THE GREGORIAN
!   CALENDAR PROMULGATED BY GREGORY XIII ON FRIDAY, 15 OCTOBER 1582.
!
!   NOTE THAT THE GREGORIAN CALENDAR WAS ADOPTED IN OCT. 15, 1582, AND HENCE
!   THIS ALGORITHM WILL NOT WORK PROPERLY FOR DATES EARLY THAN 10-15-1582.
!
!   PRINT THE RESULT.
!
    call get_date( iyr, imon, iday, date )
!
    write (prtunit,*) 'The date ' // date // ' is a ' // days(idayweek)
!
!
! END OF PROGRAM ex1_ymd_to_dayweek
! ================================
!
end program ex1_ymd_to_dayweek

ex2_bd_deflate2.F90

program ex2_bd_deflate2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_DEFLATE2
!   in module SVD_Procedures for computing all or selected singular vectors
!   of a real matrix by a (Godunov) deflation technique.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP2 and BD_SINGVAL2 in module SVD_Procedures
!    for computing a bidiagonal reduction of a real matrix by the Ralha-Barlow one-sided
!    bidiagonalisation algorithm and all or selected singular values of a real bidiagonal matrix
!    by a bisection method.
!
!
! LATEST REVISION : 05/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, safmin, bd_cmp2, bd_singval2,  &
                         bd_deflate2, norm, unit_matrix, c50, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
!
    integer(i4b), parameter :: prtunit=6, n=3000, m=3000, nsing=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of bd_deflate2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, abstol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, d, e
    real(stnd), allocatable, dimension(:,:) :: a, a2, p, leftvec, rightvec, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: ns, max_qr_steps
!
    logical(lgl) :: failure1, failure2, failure3, ortho, do_test, gen_p
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A n-BY-m REAL MATRIX WITH n>=m USING
!               THE RALHA-BARLOW ONE-SIDED BIDIAGONALISATION ALGORITHM, 
!               A BISECTION ALGORITHM FOR SINGULAR VALUES AND THE GODUNOV
!               DEFLATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps    = fudge*epsilon( err )
    err    = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RALHA-BARLOW ONE-SIDED BIDIAGONALISATION ALGORITHM.
!
    gen_p = false
!
!   CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   CHOOSE TUNING PARAMETERS FOR THE DEFLATION ALGORITHM.
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!   EXCEEDS max_qr_steps * nsing.
!
    max_qr_steps = 4_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), p(m,m), leftvec(n,nsing), rightvec(m,nsing),      &
              s(m), d(m), e(m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a.
!
    call random_number( a )
!
    if ( do_test ) then
!
        allocate( a2(n,m), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX.
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE ALL THE SINGULAR VALUES
!   OF a AND nsing LEFT AND RIGHT SINGULAR VECTORS OF a IN THREE STEPS:
!
!   STEP1 : CALL bd_cmp2 TO REDUCE THE MATRIX a TO BIDIAGONAL FORM
!
!               a = Q*BD*P**(t)
!
!   WHERE Q AND P ARE ORTHOGONAL AND BD IS AN UPPER BIDIAGONAL MATRIX.
!   bd_cmp2 USES THE ONE-SIDED RALHA-BARLOW ALGORITM AND IS FASTER THAN
!   THE STANDARD BIDIAGONALIZATION ALGORITHM USED IN bd_cmp. HOWEVER, A
!   A SLIGHT LOSS OF ORTHOGONALITY CAN BE EXPECTED FOR Q WHEN a IS NEARLY
!   SINGULAR SINCE Q IS COMPUTED FROM A RECURRENCE RELATIONSHIP.
!
    call bd_cmp2( a(:n,:m), d(:m), e(:m), p(:m,:m), failure=failure1, gen_p=gen_p )
!
!   ON OUTPUT OF bd_cmp2:
!
!       a CONTAINS THE FIRST min(m,n) COLUMNS OF Q
!       AND p CONTAINS THE ORTHOGONAL MATRIX P IN FACTORED FORM IF gen_p=false
!       OR IN EXPLICIT FORM IF gen_p=true.
!
!       d AND e CONTAINS RESPECTIVELY, THE DIAGONAL AND
!       SUBDIAGONAL OF THE BIDIAGONAL MATRIX BD.
!
!       failure= false :  INDICATES THAT MAXIMUM ACCURACY WAS OBTAINED.
!       failure= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS
!                         OF ORTHOGONALITY FOR Q CAN BE EXPECTED.
!
!   STEP2 : COMPUTE SINGULAR VALUES OF THE BIDIAGONAL MATRIX (WHICH ARE ALSO THE
!   SINGULAR VALUES OF a) WITH SUBROUTINE bd_singval2 TO HIGH RELATIVE PRECISION.
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d').
!
!   THE OPTIONAL ARGUMENT abstol OF bd_singval2 MAY BE USED TO INDICATE THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES. A SINGULAR VALUE IS CONSIDERED TO BE LOCATED
!   IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt( safmin ) ).
!
    call bd_singval2( d(:m), e(:m), ns, s(:m), failure=failure2, sort=sort, abstol=abstol )
!
!   ON EXIT OF bd_singval2 :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT
!         failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND THAT
!                           FULL  ACCURACY WAS NOT ATTAINED IN COMPUTING THE SINGULAR
!                           VALUES OF AN INTERMEDIATE BIDIAGONAL FORM BD OF a.
!
!         s IS OVERWRITTEN WITH THE SINGULAR VALUES OF THE BIDIAGONAL FORM BD OF a.
!
!         IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!         IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!
!   HERE, SORT = 'd' IS USED. THIS IS REQUIRED FOR THE USE OF bd_inviter2.
!
!   STEP3 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
!   ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!   MATRIX (OR OF a). THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!   OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    call bd_deflate2( a(:n,:m), p(:m,:m), d(:m), e(:m), s(:nsing),                 &
                      leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure3,   &
                      ortho=ortho, max_qr_steps=max_qr_steps                       )
!
!   ON EXIT OF bd_deflate2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                         DEFLATION ALGORITHM.

!       leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!       SINGULAR VECTORS OF a .
!
!   bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
!
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        a2(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
!
        err1 = maxval( a2(:nsing,1_i4b) )/ ( sum( abs(s(:m)) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, p, leftvec, rightvec, s, d, e )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure2 .and. .not.failure3 ) then
!    if ( err<=eps .and. .not.failure1 .and. .not.failure2 .and. .not.failure3 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_cmp2() )     = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_singval2() ) = ', failure2
    write (prtunit,*) ' FAILURE ( from bd_deflate2() ) = ', failure3
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_bd_deflate2
! ==============================
!
end program ex2_bd_deflate2

ex2_bd_inviter.F90

program ex2_bd_inviter
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_INVITER
!   in module SVD_Procedures for computing all or selected singular vectors
!   of a real bidiagonal matrix by inverse iterations.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP2 and BD_SVD in module SVD_Procedures
!    and subroutine APPLY_Q_QR in module QR_Procedures for computing a bidiagonal reduction
!    of a real matrix by the Ralha-Barlow one-sided bidiagonalisation algorithm and
!    all singular values of a real bidiagonal matrix by the bidiagonal QR SVD method
!    with implicit shift.
!
!
! LATEST REVISION : 07/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, one, c100, bd_inviter, bd_svd, &
                         bd_cmp2, unit_matrix, norm, merror, allocate_error, apply_q_qr
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP nsing SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, m=6000, n=3000, nsing=3000, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c100
!
    character(len=*), parameter :: name_proc='Example 2 of bd_inviter'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, sup, singval
    real(stnd), allocatable, dimension(:,:) :: a, a2, p, resid, leftvec, rightvec, leftvec0, rightvec0
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure1, failure2, failure3, bd_is_upper, gen_p, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A REAL m-BY-n MATRIX USING THE Ralha-Barlow ONE_SIDED ALGORITHM,
!               THE GOLUB-REINSCH ALGORITHM FOR ALL SINGULAR VALUES AND THE INVERSE ITERATION
!               TECHNIQUE FOR SELECTED SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RALHA-BARLOW ONE-SIDED BIDIAGONALISATION ALGORITHM.
!
    gen_p = false
!
    bd_is_upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    if ( gen_p ) then
!
        allocate( a(m,n), p(n,n), d(n), e(n), singval(n),           &
                  sup(n), leftvec(m,nsing), rightvec(n,nsing),      &
                  leftvec0(n,nsing), rightvec0(n,nsing), stat=iok   )
!
    else
!
        allocate( a(m,n), p(n,n), d(n), e(n), singval(n),           &
                  sup(n), leftvec(m,nsing), rightvec(n,nsing),      &
                  leftvec0(n,nsing), stat=iok   )
!
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX.
!
    call random_number( a )
!
    if ( do_test ) then
!
        allocate( a2(m,n), resid(m,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE DATA MATRIX.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   FIRST CALL bd_cmp2 TO REDUCE THE MATRIX a TO BIDIAGONAL FORM
!
!               a = Q*BD*P**(t)
!
!   WHERE Q AND P ARE ORTHOGONAL AND BD IS AN UPPER BIDIAGONAL MATRIX.
!   bd_cmp2 USES THE ONE_SIDED RHALA-BARLOW ALGORITM AND IS FASTER THAN
!   THE STANDARD BIDIAGONALIZATION ALGORITHM USED IN bd_cmp. HOWEVER, A
!   A SLIGHT LOSS OF ORTHOGONALITY CAN BE EXPECTED FOR Q SINCE Q IS
!   COMPUTED FROM A RECURRENCE RELATIONSHIP.
!
    call bd_cmp2( a, d, e, p, failure=failure1, gen_p=gen_p )
!
!   ON OUTPUT OF bd_cmp2:
!
!       a CONTAINS THE FIRST min(n,m) COLUMNS OF Q
!       AND p CONTAINS THE ORTHOGONAL MATRIX P IN FACTORED FORM IF gen_p=false
!       OR IN EXPLICIT FORM IF gen_p=true.
!
!       d AND e CONTAINS RESPECTIVELY, THE DIAGONAL AND
!       SUBDIAGONAL OF THE BIDIAGONAL MATRIX BD.
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS
!                         OF ORTHOGONALITY FOR Q CAN BE EXPECTED.
!
!   NEXT COMPUTE ALL SINGULAR VALUES OF THE BIDIAGONAL MATRIX BD .
!   THE SINGULAR VALUES ARE STORED IN singval IN DECREASING ORDER (sort='d').
!
    singval(:n) = d(:n)
    sup(:n)     = e(:n)
!
    call bd_svd( bd_is_upper, singval(:n), sup(:n), failure=failure2, sort=sort  )
!
!   ON EXIT OF bd_svd :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT.
!         failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE BIDIAGONAL QR ALGORITHM.
!
!   NOW COMPUTE THE FIRST nsing SINGULAR VECTORS OF BD BY maxiter INVERSE ITERATIONS WITH
!   SUBROUTINE bd_inviter. THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    if ( gen_p ) then
!
        call bd_inviter( bd_is_upper, d(:n), e(:n), singval(:nsing), leftvec0(:n,:nsing), rightvec0(:n,:nsing),  &
                         failure=failure3, maxiter=maxiter )
!
    else
!
        call bd_inviter( bd_is_upper, d(:n), e(:n), singval(:nsing), leftvec0(:n,:nsing), rightvec(:n,:nsing),  &
                         failure=failure3, maxiter=maxiter )
!
    end if
!
!   ON EXIT OF bd_inviter :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT.
!         failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM.
!
!         leftvec AND rightvec CONTAIN, RESPECTIVELY, THE nsing LEFT AND RIGHT SINGULAR
!         VECTORS OF THE BIDIAGONAL MATRIX BD ASSOCIATED WITH THE SINGULAR VALUES singval(:nsing).
!
!   bd_inviter MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER singval ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL BIDIAGONAL MATRICES.
!
!   FINALLY COMPUTE SINGULAR VECTORS OF THE ORIGINAL MATRIX BY MULTIPLICATION OR BACK-TRANSFORMATION.
!
    if ( gen_p ) then
!
        rightvec(:n,:nsing) = matmul( p(:n,:n), rightvec0(:n,:nsing) )
!
    else
!
        call apply_q_qr( p(2_i4b:n,2_i4b:n), p(2_i4b:n,1_i4b), rightvec(2_i4b:n,:nsing),  &
                         left=true, trans=false  )
!
    end if
!
    leftvec(:m,:nsing) = matmul( a(:m,:n), leftvec0(:n,:nsing) )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:n,:nsing) - U(:m,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:m,:nsing) = matmul(a2,rightvec) - leftvec*spread(singval(:nsing),dim=1,ncopies=m)
        a2(:nsing,1_i4b) = norm( resid(:m,:nsing), dim=2_i4b )
!
        err1 = maxval( a2(:nsing,1_i4b) )/( sum( abs(singval(:n)) )*real(m,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( gen_p ) then
!
        deallocate( a, p, leftvec, rightvec, leftvec0, rightvec0, singval, d, e, sup )
!
    else
!
        deallocate( a, p, leftvec, rightvec, leftvec0, singval, d, e, sup )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure2 .and. .not.failure3 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_cmp2() )    = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_svd() )     = ', failure2
    write (prtunit,*) ' FAILURE ( from bd_inviter() ) = ', failure3
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_bd_inviter
! =============================
!
end program ex2_bd_inviter

ex2_bd_inviter2.F90

program ex2_bd_inviter2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_INVITER2
!   in module SVD_Procedures for computing all or selected singular vectors
!   of a real matrix by inverse iterations.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP2 and BD_SVD in module SVD_Procedures
!    for computing a bidiagonal reduction of a real matrix by the Ralha-Barlow one-sided
!    bidiagonalisation algorithm and all singular values of a real bidiagonal matrix
!    by the bidiagonal QR SVD method with implicit shift.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, true, false, bd_inviter2, bd_cmp2, bd_svd, &
                         norm, unit_matrix, c50, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP nsing SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, m=3000, nsing=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of bd_inviter2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, d, e, e2
    real(stnd), allocatable, dimension(:,:) :: a, a2, p, leftvec, rightvec, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure1, failure2, failure3, do_test, gen_p
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : ALL SINGULAR VALUES AND nsing SINGULAR VECTORS OF A n-BY-m REAL MATRIX WITH n>=m
!               USING THE RALHA-BARLOW ONE-SIDED BIDIAGONALISATION ALGORITHM, THE BIDIAGONAL QR
!               ALGORITHM FOR THE SINGULAR VALUES AND THE INVERSE ITERATION METHOD FOR THE 
!               SINGULAR VECTORS (EG PARTIAL SVD DECOMPOSITION).
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE RALHA-BARLOW ONE-SIDED BIDIAGONALISATION ALGORITHM.
!
    gen_p = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), p(m,m), leftvec(n,nsing), rightvec(m,nsing),      &
              s(m), d(m), e(m), e2(m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a .
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE ALL THE SINGULAR VALUES OF a AND
!   ONLY nsing LEFT AND RIGHT SINGULAR VECTORS OF a (EG A PARTIAL SVD DECOMPOSITION
!   OF a) IN THREE STEPS:
!
!   STEP1 : CALL bd_cmp2 TO REDUCE THE MATRIX a TO BIDIAGONAL FORM
!
!               a = Q*BD*P**(t)
!
!   WHERE Q AND P ARE ORTHOGONAL AND BD IS AN UPPER BIDIAGONAL MATRIX.
!   bd_cmp2 USES THE ONE-SIDED RHALA-BARLOW ALGORITM AND IS FASTER THAN
!   THE STANDARD BIDIAGONALIZATION ALGORITHM USED IN bd_cmp. HOWEVER, A
!   A SLIGHT LOSS OF ORTHOGONALITY CAN BE EXPECTED FOR Q WHEN a IS NEARLY
!   SINGULAR SINCE Q IS COMPUTED FROM A RECURRENCE RELATIONSHIP.
!
    call bd_cmp2( a(:n,:m), d(:m), e(:m), p(:m,:m), failure=failure1, gen_p=gen_p )
!
!   ON OUTPUT OF bd_cmp2:
!
!       a CONTAINS THE FIRST min(m,n) COLUMNS OF Q
!       AND p CONTAINS THE ORTHOGONAL MATRIX P IN FACTORED FORM IF gen_p=false
!       OR IN EXPLICIT FORM IF gen_p=true.
!
!       d AND e CONTAINS RESPECTIVELY, THE DIAGONAL AND
!       SUBDIAGONAL OF THE BIDIAGONAL MATRIX BD.
!
!       failure= false :  INDICATES THAT MAXIMUM ACCURACY WAS OBTAINED.
!       failure= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS
!                         OF ORTHOGONALITY FOR Q CAN BE EXPECTED.
!
!   STEP2 : COMPUTE ALL SINGULAR VALUES OF THE BIDIAGONAL MATRIX BD WITH SUBROUTINE bd_svd.
!
!   FIRST MAKE A COPY OF THE BIDIAGONAL MATRIX BD FOR LATER USE WITH bd_inviter2 SUBROUTINE.
!
    s(:m)  = d(:m)
    e2(:m) = e(:m)
!
    call bd_svd( true, s(:m), e2(:m), failure=failure2, sort=sort  )
!
!   ON EXIT OF bd_svd :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT
!         failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                           THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                           SVD OF AN INTERMEDIATE BIDIAGONAL FORM BD OF a.
!
!         s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!         IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!         IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!
!   HERE, SORT = 'd' IS USED. THIS IS REQUIRED FOR THE USE OF bd_inviter2.
!
!   STEP3 : COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!   INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION
!   WITH SUBROUTINE bd_inviter2 .
!
!   ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!   THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call bd_inviter2( a, p, d, e, s(:nsing), leftvec, rightvec, failure=failure3, maxiter=maxiter )
!
!   ON EXIT OF bd_inviter2 :
!
!         failure= false :  INDICATES SUCCESSFUL EXIT.
!         failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE FOR SOME SINGULAR VECTORS.
!
!   THE SINGULAR VECTORS OF THE BIDIAGONAL FORM ARE COMPUTED USING maxiter INVERSE ITERATIONS
!   FOR ALL THE SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF ARE THEN ORTHOGONALIZED BY 
!   THE MODIFIED GRAM-SCHMIDT ALGORITHM IF NECESSARY. THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED 
!   BY BACK TRANSFORMATIONS. ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT
!   SINGULAR VECTORS OF a, RESPECTIVELY.
!
!   NOTE THAT bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        a2(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
        err1 = maxval( a2(:nsing,1_i4b) )/( sum( abs(s(:n)) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, p, leftvec, rightvec, s, d, e, e2 )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure2 .and. .not.failure3 ) then
!    if ( err<=eps .and. .not.failure1 .and. .not.failure2 .and. .not.failure3 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from bd_cmp2() )     = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_svd() )      = ', failure2
    write (prtunit,*) ' FAILURE ( from bd_inviter2() ) = ', failure3
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_bd_inviter2
! ==============================
!
end program ex2_bd_inviter2

ex2_bd_singval.F90

program ex2_bd_singval
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_SINGVAL
!   in module SVD_Procedures for computing all or selected singular values
!   of a real bidiagonal matrix by the bisection method. The singular values
!   can be computed at high relative precision at the user option.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP, APPLY_Q_BD, APPLY_P_BD
!    in module SVD_Procedures for computing all or selected singular vectors
!   of a real matrix by inverse iterations.
!
!
! LATEST REVISION : 07/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, safmin, c50, bd_inviter,  &
                         bd_cmp, bd_singval, apply_q_bd, apply_p_bd, merror,          &
                         allocate_error, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! ls IS THE NUMBER OF SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=1000, m=500, mn=min(m,n), ls=20, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of bd_singval'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, abstol
    real(stnd), dimension(n,m)              :: a, a2
    real(stnd), dimension(:,:), allocatable :: leftvec, rightvec
    real(stnd), dimension(mn)               :: s, d, e, tauq, taup
!
    integer(i4b) :: nsing
    integer      :: iok
!
    logical(lgl) :: failure, bd_is_upper, vector
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A n-BY-m REAL MATRIX USING THE GOLUB-REINSCH
!               BIDIAGONAL REDUCTION ALGORITHM, A BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE INVERSE ITERATION TECHNIQUE FOR
!               SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   COMPUTE THE SINGULAR VALUES WITH A VECTORIZED VERSION OF THE BISECTION
!   ALGORITHM.
!
    vector = true
!
!   GENERATE A RANDOM DATA MATRIX.
!
    call random_number( a )
!
!   SAVE RANDOM DATA MATRIX.
!
    a2(:n,:m) = a(:n,:m)
!
!   REDUCE a TO LOWER OR UPPER BIDIAGONAL FORM.
!
    call bd_cmp( a, d, e, tauq, taup )
!
!   COMPUTE THE FIRST ls SINGULAR VALUES OF BIDIAGONAL FORM OF a BY A BISECTION METHOD.
!
    call bd_singval( d, e, nsing, s, failure, sort=sort, vector=vector, ls=ls, abstol=abstol  )
!
    if ( .not. failure ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        bd_is_upper = n>=m
!
!       COMPUTE THE FIRST nsing SINGULAR VECTORS OF a BY maxiter
!       INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call bd_inviter( bd_is_upper, d, e, s(:nsing), leftvec(:mn,:nsing), rightvec(:mn,:nsing),  &
                         failure, maxiter=maxiter )
!
!       COMPUTE SINGULAR VECTORS OF a BY BACK-TRANSFORMATION.
!
        if ( bd_is_upper ) then
            leftvec(mn+1_i4b:n,:nsing) = zero
        else
            rightvec(mn+1_i4b:m,:nsing) = zero
        end if
!
!       GENERATE LEFT SINGULAR VECTORS OF a IN leftvec BY BACK-TRANSFORMATION.
!
        call apply_q_bd( a, tauq, leftvec,  left=true, trans=false )
!
!       GENERATE RIGHT SINGULAR VECTORS OF a IN rightvec BY BACK-TRANSFORMATION.
!
        call apply_p_bd( a, taup, rightvec, left=true, trans=false )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*rightvec - leftvec*s(:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS.
!
        err =  norm( matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n) )/( norm( a2 )*real(mn,stnd) )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( leftvec,  rightvec  )
!
    end if
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_bd_singval
! =============================
!
end program ex2_bd_singval

ex2_bd_singval2.F90

program ex2_bd_singval2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_SINGVAL2
!   in module SVD_Procedures for computing all or selected singular values
!   of a real bidiagonal matrix by the bisection method. The singular values
!   can be computed at high relative precision at the user option.
!   BD_SINGVAL2 is faster, but less accurate than BD_SINGVAL subroutine.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines BD_CMP, APPLY_Q_BD, APPLY_P_BD
!    in module SVD_Procedures for computing all or selected singular vectors
!   of a real matrix by inverse iterations.
!
!
! LATEST REVISION : 07/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, safmin, c50, bd_inviter,  &
                         bd_cmp, bd_singval2, apply_q_bd, apply_p_bd, merror,         &
                         allocate_error, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! ls IS THE NUMBER OF SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=1000, m=500, mn=min(m,n), ls=20, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of bd_singval2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, abstol
    real(stnd), dimension(n,m)              :: a, a2
    real(stnd), dimension(:,:), allocatable :: leftvec, rightvec
    real(stnd), dimension(mn)               :: s, d, e, tauq, taup
!
    integer(i4b) :: nsing
    integer      :: iok
!
    logical(lgl) :: failure, bd_is_upper, vector
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A n-BY-m REAL MATRIX USING THE GOLUB-REINSCH
!               BIDIAGONAL REDUCTION ALGORITHM, A BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE INVERSE ITERATION TECHNIQUE FOR
!               SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   COMPUTE THE SINGULAR VALUES WITH A VECTORIZED VERSION OF THE BISECTION
!   ALGORITHM.
!
    vector = true
!
!   GENERATE A RANDOM DATA MATRIX.
!
    call random_number( a )
!
!   SAVE RANDOM DATA MATRIX.
!
    a2(:n,:m) = a(:n,:m)
!
!   REDUCE a TO LOWER OR UPPER BIDIAGONAL FORM.
!
    call bd_cmp( a, d, e, tauq, taup )
!
!   COMPUTE THE FIRST ls SINGULAR VALUES OF BIDIAGONAL FORM OF a BY A BISECTION METHOD.
!
    call bd_singval2( d, e, nsing, s, failure, sort=sort, vector=vector, ls=ls, abstol=abstol )
!
    if ( .not. failure .and. nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        bd_is_upper = n>=m
!
!       COMPUTE THE FIRST nsing SINGULAR VECTORS OF a BY maxiter
!       INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call bd_inviter( bd_is_upper, d, e, s(:nsing), leftvec(:mn,:nsing), rightvec(:mn,:nsing),  &
                         failure, maxiter=maxiter )
!
!       COMPUTE SINGULAR VECTORS OF a BY BACK-TRANSFORMATION.
!
        if ( bd_is_upper ) then
            leftvec(mn+1_i4b:n,:nsing) = zero
        else
            rightvec(mn+1_i4b:m,:nsing) = zero
        end if
!
!       GENERATE LEFT SINGULAR VECTORS OF a IN leftvec BY BACK-TRANSFORMATION.
!
        call apply_q_bd( a, tauq, leftvec,  left=true, trans=false )
!
!       GENERATE RIGHT SINGULAR VECTORS OF a IN rightvec BY BACK-TRANSFORMATION.
!
        call apply_p_bd( a, taup, rightvec, left=true, trans=false )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*rightvec - leftvec*s(:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS.
!
        err =  norm( matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n) )/( norm( a2 )*real(mn,stnd) )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( leftvec,  rightvec  )
!
    end if
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_bd_singval2
! ==============================
!
end program ex2_bd_singval2

ex2_bd_svd.F90

program ex2_bd_svd
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine BD_SVD
!   in module SVD_Procedures for computing a partial SVD decomposition of
!   a bidiagonal matrix.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_INVITER in module SVD_Procedures
!    for computing selected singular vectors of a bidiagonal matrix by inverse ierations.
!
!
! LATEST REVISION : 07/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, one, c50,     &
                         bd_inviter, bd_svd, unit_matrix, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE BIDIAGONAL MATRIX,
! nsing IS THE NUMBER OF THE LARGEST SINGULAR TRIPLETS WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP nsing SINGULAR VECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=500, nsing=10, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of bd_svd'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err3, err, eps
    real(stnd), dimension(n)                :: diag, sup, sup2, singval
    real(stnd), dimension(n,nsing)          :: leftvec, rightvec
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid
!
    integer      :: iok
!
    logical(lgl) :: failure, a_is_upper, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   GENERATE AN UPPER BIDIAGONAL TOEPLITZ MATRIX a .
!   THE DIAGONAL ELEMENTS ARE STORED IN diag .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN sup .
!
    a_is_upper   = true
    diag(:n)     = half
    sup(1_i4b)   = zero
    sup(2_i4b:n) = one
!
!   MAKE A COPY OF THE BIDIAGONAL MATRIX.
!
    singval(:n) = diag(:n)
    sup2(:n)    = sup(:n)
!
!    COMPUTE SINGULAR VALUES OF BIDIAGONAL MATRIX a .
!
     call bd_svd( a_is_upper, singval(:n), sup2(:n), failure, sort=sort  )
!
     if ( .not. failure ) then
!
!       COMPUTE THE FIRST nsing SINGULAR VECTORS OF a BY maxiter INVERSE ITERATIONS.
!
        call bd_inviter( a_is_upper, diag(:n), sup(:n), singval(:nsing), leftvec(:n,:nsing),  &
                        rightvec(:n,:nsing), failure, maxiter=maxiter )
!
        if ( do_test ) then
!
            allocate( a(nsing,nsing), a2(nsing,nsing), resid(n,nsing), stat=iok )
!
            if ( iok/=0 ) then
                call merror( name_proc//allocate_error )
            end if
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*rightvec - leftvec*singval(:nsing),
!           WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS.
!
            if ( a_is_upper ) then
!
                resid(:n,:nsing) = spread( diag(:n),        dim=2, ncopies=nsing )*rightvec                 + &
                                   eoshift( spread(sup(:n), dim=2,ncopies=nsing)*rightvec, shift=1 )        - &
                                   spread( singval(:nsing), dim=1, ncopies=n )*leftvec 

            else
!
                resid(:n,:nsing) = spread( diag(:n),        dim=2, ncopies=nsing )*leftvec                   + &
                                   eoshift( spread(sup(:n), dim=2,ncopies=nsing)*leftvec, shift=1 )          - &
                                   spread( singval(:nsing), dim=1, ncopies=n )*rightvec
!
            end if
!
            err1 = norm(resid)/(sum( singval(:n) )*real(n,stnd) )
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!           WHERE u are LEFT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX a.
!
            call unit_matrix( a )
!
            a2 = a - matmul( transpose( leftvec ), leftvec )
            err2 = norm(a2)/real(n,stnd)
!
!           CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v**(t)*v
!           WHERE v are RIGHT SINGULAR VECTORS OF THE BIDIAGONAL MATRIX a.
!
            a2 = a - matmul( transpose( rightvec ), rightvec )
            err3 = norm(a2)/real(n,stnd)
!
            err = max( err1, err2, err3 )
!
            deallocate( a, a2, resid )
!
        end if
!
    end if
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_bd_svd
! =========================
!
end program ex2_bd_svd

ex2_chol_cmp.F90

program ex2_chol_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines CHOL_CMP and CHOL_SOLVE
!   in module Lin_Procedures for computing a Cholesky decomposition of a real symmetric
!   positive-definite matrix and solving a linear system with such matrix as a coefficient
!   matrix and several right hand sides.
!
!
! LATEST REVISION : 12/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, true, false, chol_cmp, chol_solve,   &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED POSITIVE DEFINITE MATRIX
! AND m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX WHICH IS USED TO DERIVE THE POSITIVE
! DEFINITE MATRIX. m SHOULD BE GREATER OR EQUAL TO n. nrhs IS THE NUMBER OF RIGHT HAND SIDES.
!
    integer(i4b), parameter :: prtunit=6, n=4000, m=n+10, nrhs=4000
!
    character(len=*), parameter :: name_proc='Example 2 of chol_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, d1, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, c, b, x, res
    real(stnd), dimension(:),   allocatable :: invdiag
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test, upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n SYMMETRIC DEFINITE POSITIVE MATRIX
!               AND SEVERAL RIGHT HAND-SIDES WITH THE CHOLESKY DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = false
!
!   SPECIFY IF THE UPPER OR LOWER PART OF THE SYMMETRIC
!   DEFINITE POSITIVE MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( c(m,n), a(n,n), b(n,nrhs), x(n,nrhs), invdiag(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM SYMMETRIC POSITIVE DEFINITE MATRIX a .
!
    call random_number( c )
!
    a = matmul( transpose(c), c )
!
!   GENERATE A n-by-nrhs RANDOM SOLUTION MATRIX x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-MATRIX PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION MATRIX FOR SYMMETRIC POSITIVE DEFINITE
!   SYSTEM
!
!            a*x = b .
!
!   BY COMPUTING THE CHOLESKY DECOMPOSITION OF MATRIX a .
!   IF ON OUTPUT OF chol_cmp d1 IS DIFFERENT FROM ZERO
!   THEN THE SYMMETRIC LINEAR SYSTEM IS NOT SINGULAR
!   AND CAN BE SOLVED BY SUBROUTINE chol_solve.
!
    call chol_cmp( a, invdiag, d1, upper=upper )
!
    if ( d1==zero ) then
!
!       ANORMAL EXIT FROM chol_cmp SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of CHOL_CMP subroutine, d1=', d1
!
    else
!
        call chol_solve( a, invdiag, b, upper=upper )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1/=zero .and. do_test ) then
!
!       ALLOCATE WORK ARRAY .
!
        allocate( res(n,nrhs), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n,:nrhs) = b(:n,:nrhs) - x(:n,:nrhs)
        err = maxval( norm(res, dim=2_i4b ) /    &
                      norm(x,  dim=2_i4b  ) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, c, x, invdiag, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, c, x, invdiag )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1/=zero ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a real symmetric positive definite system of size ', &
       n, ' with ', nrhs, ' right hand side vectors is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_chol_cmp
! ===========================
!
end program ex2_chol_cmp

ex2_comp_cor.F90

program ex2_comp_cor
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_COR
!   in module Mul_Stat_Procedures for computing univariate statistics and a correlation
!   matrix between two two-dimensional arrays (e.g., two sets of variables).
!
!   All the statistics and correlations are computed with only one-pass on the data
!   in one or several steps with a very efficient algorithm for large datasets, which
!   also allows out-of-core computations.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_cor
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; n, m AND p ARE THE DIMENSIONS OF THE ARRAYS.
! p IS THE NUMBER OF OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, n=26, m=20, p=50
!
    character(len=*), parameter :: name_proc='Example 2 of comp_cor'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                    :: err_xstat, err_ystat, err_cor, xyn, eps
    real(stnd), dimension(n,m)    :: xycor1, xycor2
    real(stnd), dimension(m,2)    :: ystat1, ystat2
    real(stnd), dimension(n,2)    :: xstat1, xstat2
    real(stnd), dimension(n,p)    :: x
    real(stnd), dimension(m,p)    :: y
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM TWO-DIMENSIONAL ARRAY x .
!
    call random_number( x(:n,:p) )
!
!   GENERATE A RANDOM TWO-DIMENSIONAL ARRAY y .
!
    call random_number( y(:m,:p) )
!
!   FIRST COMPUTE THE CORRELATION MATRIX BETWEEN x AND y
!   FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cor( x(:n,:p), y(:m,:p), first, last,                    &
                   xstat1(:n,:2), ystat1(:m,:2), xycor1(:n,:m), xyn )
!
!   ON EXIT OF COMP_COR WHEN last=true :
!
!      xstat1(i,1) CONTAINS THE MEAN VALUES OF THE ARRAY SECTION x(i,:p).
!
!      xstat1(i,2) CONTAINS THE VARIANCES OF THE ARRAY SECTION x(i,:p).
!
!      ystat1(j,1) CONTAINS THE MEAN VALUE OF THE ARRAY SECTION y(j,:p).
!
!      ystat1(j,2) CONTAINS THE VARIANCE OF THE ARRAY SECTION y(j,:p).
!
!      xycor1(i,j) CONTAINS THE CORRELATION COEFFICIENT
!                  BETWEEN x(i,:p) AND y(j,:p).
!
!      xyn         CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAYS
!                  x(:n,:p) AND y(:m,:p) (E.G., xyn=real(p,stnd) ).
!
!   SECOND COMPUTE THE CORRELATION MATRIX BETWEEN x AND y,
!   ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_cor( x(:n,i:i), y(:m,i:i), first, last,               &
                       xstat2(:n,:2), ystat2(:m,:2), xycor2(:n,:m), xyn )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err_xstat = maxval( abs( ( xstat1-xstat2)/xstat1 ) )
    err_ystat = maxval( abs( ( ystat1-ystat2)/ystat1 ) )
    err_cor   = maxval( abs( xycor1-xycor2 ) )
!
    if ( max(err_xstat, err_ystat, err_cor )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_comp_cor
! ===========================
!
end program ex2_comp_cor

ex2_comp_cor_miss.F90

program ex2_comp_cor_miss
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_COR_MISS
!   in module Mul_Stat_Procedures for computing univariate statistics and a correlation
!   matrix between two two-dimensional arrays (e.g., two sets of variables), both with missing values.
!
!   All the statistics and correlations are computed with only one-pass on the data
!   in one or several steps with a very efficient algorithm for large datasets, which
!   also allows out-of-core computations.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_cor_miss
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; n, m AND p ARE THE DIMENSIONS OF THE ARRAYS.
! p IS THE NUMBER OF OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, n=26, m=20, p=50
!
! miss IS THE MISSING INDICATOR.
!
    real(stnd), parameter :: miss=-999.99_stnd
!
    character(len=*), parameter :: name_proc='Example 2 of comp_cor_miss'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                    :: err_xstat, err_ystat, err_cor, eps
    real(stnd), dimension(n,m,4)  :: xycor1, xycor2
    real(stnd), dimension(m,4)    :: ystat1, ystat2
    real(stnd), dimension(n,4)    :: xstat1, xstat2
    real(stnd), dimension(n,p)    :: x
    real(stnd), dimension(m,p)    :: y
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM TWO-DIMENSIONAL ARRAY x WITH MISSING VALUES.
!
    call random_number( x(:n,:p) )
    where ( x(:n,:p)<=0.05_stnd ) x(:n,:p) = miss
!
!   GENERATE A RANDOM TWO-DIMENSIONAL ARRAY y WITH MISSING VALUES.
!
    call random_number( y(:m,:p) )
    where ( y(:m,:p)<=0.05_stnd ) y(:m,:p) = miss
!
!   FIRST COMPUTE THE CORRELATION MATRIX BETWEEN x AND y
!   FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cor_miss( x(:n,:p), y(:m,:p), first, last, xstat1(:n,:4), &
                        ystat1(:m,:4), xycor1(:n,:m,:4), xymiss=miss )
!
!   ON EXIT OF comp_cor_miss WHEN last=true :
!
!      xstat1(i,1) CONTAINS THE MEAN VALUES OF THE ARRAY SECTION x(i,:p).
!
!      xstat1(i,2) CONTAINS THE VARIANCES OF THE ARRAY SECTION x(i,:p).
!
!      xstat1(i,3) CONTAINS THE NUMBER OF NON-MISSING OBSERVATIONS
!                  IN THE ARRAY SECTION x(i,:p).
!
!      ystat1(j,1) CONTAINS THE MEAN VALUE OF THE ARRAY SECTION y(j,:p).
!
!      ystat1(j,2) CONTAINS THE VARIANCE OF THE ARRAY SECTION y(j,:p).
!
!      ystat1(j,3) CONTAINS THE NUMBER OF NON-MISSING OBSERVATIONS
!                  IN THE ARRAY SECTION y(j,:p).
!
!      xycor1(i,j,1) CONTAINS THE CORRELATION COEFFICIENT BETWEEN x(i,:p) AND y(j,:p)
!                    COMPUTED WITH THE ABOVE UNIVARIATE STATISTICS.
!
!      xycor1(i,j,2) CONTAINS THE INCIDENCE VALUE BETWEEN x(i,:p) AND y(j,:p).
!                    xycor1(i,j,2) INDICATES THE NUMBER OF VALID PAIRS OF OBSERVATIONS
!                    WHICH WHERE USED IN THE CALCULATION OF xycor1(i,j,1) .
!
!   xstat1(:,4), ystat1(:,4) AND xycor1(:,:,3:4) ARE USED AS WORKSPACE AND CONTAIN NO USEFUL
!   INFORMATION ON OUTPUT OF comp_cor_miss.
!
!   SECOND COMPUTE THE CORRELATION MATRIX BETWEEN x AND y,
!   ITERATIVELY  FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_cor_miss( x(:n,i:i), y(:m,i:i), first, last, xstat2(:n,:4), &
                            ystat2(:m,:4), xycor2(:n,:m,:4), xymiss=miss )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err_xstat = maxval( abs( ( xstat1(:n,:3)-xstat2(:n,:3))/xstat1(:n,:3) ) )
    err_ystat = maxval( abs( ( ystat1(:m,:3)-ystat2(:m,:3))/ystat1(:m,:3) ) )
    err_cor   = maxval( abs( xycor1(:n,:m,:2)-xycor2(:n,:m,:2) ) )
!
    if ( max(err_xstat, err_ystat, err_cor )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_comp_cor_miss
! ================================
!
end program ex2_comp_cor_miss

ex2_comp_cormat.F90

program ex2_comp_cormat
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_CORMAT
!   in module Mul_Stat_Procedures for computing a correlation matrix from
!   a dataset and storing it in a linear array in packed format.
!
!   The correlation matrix is computed with only one-pass on the dataset
!   in one or several steps with a very efficient algorithm for large datasets,
!   which also allows out-of-core computations.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, comp_cormat
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; m AND p ARE THE DIMENSIONS OF THE DATASET.
! m AND p ARE, RESPECTIVELY, THE NUMBER OF VARIABLES AND OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=500, n=(m*(m+1))/2
!
    character(len=*), parameter :: name_proc='Example 2 of comp_cormat'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                       :: err_mean, err_std, err_cor, eps, xn
    real(stnd), dimension(n)         :: corp1, corp2
    real(stnd), dimension(m,p)       :: x
    real(stnd), dimension(m)         :: mean1, mean2, std1, std2
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last, cov
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION ARRAY x.
!
    call random_number( x )
!
!   IF OPTIONAL ARGUMENT cov IS SET TO TRUE, comp_cormat WILL COMPUTE A
!   VARIANCES-COVARIANCES MATRIX INSTEAD OF A CORRELATION MATRIX ON EXIT.
!   BY DEFAULT, A CORRELATION MATRIX IS COMPUTED.
!
    cov = false
!
!   FIRST COMPUTE THE MEANS, STANDARD-DEVIATIONS AND CORRELATION MATRIX
!   OF x FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cormat( x(:m,:p), first, last, mean1(:m), corp1(:n), xn,    &
                      xstd=std1(:m), cov=cov )
!
!   ON EXIT, WHEN last=true :
!
!     mean1(:m) CONTAINS THE VARIABLE MEANS COMPUTED FROM ALL OBSERVATIONS 
!     IN THE DATA MATRIX x.
!
!     THE UPPER TRIANGLE OF THE SYMMETRIC CORRELATION OR VARIANCE-COVARIANCE MATRIX cor,
!     AS CONTROLLED BY THE cov ARGUMENT, IS PACKED COLUMNWISE IN THE LINEAR ARRAY corp1.
!     MORE PRECISELY, THE J-TH COLUMN  OF cor IS STORED IN THE ARRAY CORP1 AS FOLLOWS:
!
!            corp1(i + (j-1)*j/2) = cor(i,j) for 1<=i<=j;
!
!     xn INDICATES THE NUMBERS OF OBSERVATIONS WHICH WERE 
!     USED IN THE CALCULATION OF corp1.
!
!     IF THE OPTIONAL ARGUMENT xstd IS PRESENT, xstd CONTAINS THE VARIABLE 
!     STANDARD-DEVIATIONS.
!
!   SECOND COMPUTE THE MEANS, STANDARD-DEVIATIONS AND CORRELATION MATRIX
!   OF x, ITERATIVELY  FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last =  i==p
!
        call comp_cormat( x(:m,i:i), first, last, mean2(:m), corp2(:n), xn,    &
                         xstd=std2(:m), cov=cov )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err_mean = maxval( abs( ( mean1-mean2)/mean1 ) )
    err_std  = maxval( abs( ( std1-std2)/std1    ) )
    err_cor  = maxval( abs( corp1-corp2 ) )
!
    if ( max(err_mean, err_std, err_cor )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_comp_cormat
! ==============================
!
end program ex2_comp_cormat

ex2_comp_cormat_miss.F90

program ex2_comp_cormat_miss
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_CORMAT_MISS
!   in module Mul_Stat_Procedures for computing a correlation matrix from
!   a dataset with missing values and storing it in a linear array in packed format.
!
!   The correlation matrix is computed with only one-pass on the dataset
!   in one or several steps with a very efficient algorithm for large datasets,
!   which also allows out-of-core computations.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, false, comp_cormat_miss
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT; m AND p ARE THE DIMENSIONS OF THE DATASET.
! m AND p ARE, RESPECTIVELY, THE NUMBER OF VARIABLES AND OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, m=20, p=500, n=(m*(m+1))/2
!
! miss IS THE MISSING INDICATOR.
!
    real(stnd), parameter :: miss=-999.99_stnd
!
    character(len=*), parameter :: name_proc='Example 2 of comp_cormat_miss'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                       :: err_mean, err_std, err_cor, eps
    real(stnd), dimension(n)         :: corp1, corp2
    real(stnd), dimension(m,p)       :: x
    real(stnd), dimension(n,3)       :: xn
    real(stnd), dimension(m,2)       :: mean1, mean2
    real(stnd), dimension(m)         :: std1, std2
!
    integer(i4b) :: i
!
    logical(lgl) :: first, last, cov
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION ARRAY x WITH MISSING VALUES.
!
    call random_number( x(:m,:p) )
    where ( x(:m,:p)<=0.05_stnd ) x(:m,:p) = miss
!
!   IF OPTIONAL ARGUMENT cov IS SET TO TRUE, comp_cormat_miss WILL COMPUTE A
!   VARIANCES-COVARIANCES MATRIX INSTEAD OF A CORRELATION MATRIX ON EXIT.
!   BY DEFAULT, A CORRELATION MATRIX IS COMPUTED.
!
    cov = false
!
!   FIRST COMPUTE THE MEANS, STANDARD-DEVIATIONS AND CORRELATION MATRIX
!   OF x FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_cormat_miss( x(:m,:p), first, last, mean1(:m,:2), corp1(:n), xn(:n,:3), miss, &
                           xstd=std1(:m), cov=cov )
!
!   ON EXIT, WHEN last=true :
!
!     mean1(:m,1) CONTAINS THE VARIABLE MEANS COMPUTED FROM ALL NON-MISSING OBSERVATIONS 
!     IN THE DATA MATRIX x. mean1(:m,2) IS USED AS WORKSPACE.
!
!     THE UPPER TRIANGLE OF THE SYMMETRIC CORRELATION OR VARIANCE-COVARIANCE MATRIX cor,
!     AS CONTROLLED BY THE cov ARGUMENT, IS PACKED COLUMNWISE IN THE LINEAR ARRAY corp1.
!     MORE PRECISELY, THE J-TH COLUMN  OF cor IS STORED IN THE ARRAY CORP1 AS FOLLOWS:
!
!            corp1(i + (j-1)*j/2) = cor(i,j) for 1<=i<=j;
!
!     xn(:n,1) CONTAINS THE UPPER TRIANGLE OF THE MATRIX OF THE INCIDENCE VALUES
!     BETWEEN EACH PAIR OF VARIABLES, PACKED COLUMNWISE, IN A LINEAR ARRAY. 
!     xn(i + (j-1)*j/2,1) INDICATES THE NUMBERS OF NON-MISSING PAIRS WHICH WERE 
!     USED IN THE CALCULATION OF cor(i,j) for 1<=i<=j . xn(:n,2:3) IS USED AS WORKSPACE.
!     
!     IF THE OPTIONAL ARGUMENT xstd IS PRESENT, xstd CONTAINS THE VARIABLE 
!     STANDARD-DEVIATIONS COMPUTED FROM ALL NON-MISSING OBSERVATIONS.
!
!
!   SECOND COMPUTE THE MEANS, STANDARD-DEVIATIONS AND CORRELATION MATRIX
!   OF x, ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last =  i==p
!
        call comp_cormat_miss( x(:m,i:i), first, last, mean2(:m,:2), corp2(:n), xn(:n,:3), miss,   &
                               xstd=std2(:m), cov=cov )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err_mean = maxval( abs( ( mean1(:m,1)-mean2(:m,1))/mean1(:m,1) ) )
    err_std  = maxval( abs( ( std1(:m)-std2(:m))/std1(:m)    ) )
    err_cor  = maxval( abs( corp1(:n)-corp2(:n) ) )
!
    if ( max(err_mean, err_std, err_cor )<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_comp_cormat_miss
! ===================================
!
end program ex2_comp_cormat_miss

ex2_comp_inv.F90

program ex2_comp_inv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_INV
!   in module Lin_Procedures for computing the inverse of a real matrix
!   without destroying the input real matrix.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, comp_inv,  &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT AND n IS THE DIMENSION OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=4000
!
    character(len=*), parameter :: name_proc='Example 2 of comp_inv'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, ainv, res
!
    integer(i4b) :: j
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, failure
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : COMPUTE THE INVERSE OF A REAL n-BY-n MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), ainv(n,n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
! 
!   GENERATE A RANDOM REAL MATRIX.
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE MATRIX.
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE MATRIX INVERSE WITH SUBROUTINE comp_inv.
!   INPUT ARGUMENT NOT OVERWRITTEN.
!
    call comp_inv( a, failure, ainv )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( failure ) then
!
!       ANORMAL EXIT FROM comp_inv SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of COMP_INV subroutine, failure=', failure
        write (prtunit,*) 
!
    else if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE a TIMES ITS INVERSE - IDENTITY.
!
        res = matmul( a2, ainv )
!
        do j = 1_i4b, n
            res(j,j) = res(j,j) - one
        end do
!
        err = norm( res(:n,:n) ) /( real(n,stnd)*norm(a2(:n,:n)) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, ainv )
!
    if ( allocated( a2 ) ) deallocate( a2 )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the inverse of a real matrix of size ', &
       n, ' by ', n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_comp_inv
! ===========================
!
end program ex2_comp_inv

ex2_comp_mvs.F90

program ex2_comp_mvs
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_MVS
!   in module Stat_Procedures for computing univariate statistics (mean, variance,
!   and standard-deviation only) from a dataset with missing values.
!   The dataset can have up to four dimensions and the last one corresponds to
!   the observations.
!
!   All the statistics are computed with only one-pass on the data in one or several steps 
!   with a very efficient algorithm for large datasets, which also allows out-of-core
!   computations.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_mvs
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! xmiss IS THE MISSING INDICATOR.
!
    real(stnd), parameter  :: xmiss=-999.99_stnd
!
! prtunit IS THE PRINTING UNIT; n, m AND p ARE THE DIMENSIONS OF THE ARRAY.
! p IS THE NUMBER OF OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, n=26, m=20, p=250
!
    character(len=*), parameter :: name_proc='Example 2 of comp_mvs'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                      :: err_mean, err_std, err_var, eps
    real(stnd), dimension(n,m)      :: xmean1, xmean2, xstd1, xstd2, xvar1, xvar2
    real(stnd), dimension(n,m,p)    :: x
!
    integer(i4b)                  :: i
    integer(i4b), dimension(n,m)  :: xnobs1, xnobs2
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(err_mean) )
!
!   GENERATE A RANDOM OBSERVATION ARRAY x WITH MISSING VALUES.
!
    call random_number( x )
!
    where( x(:,:,:)<=0.25_stnd )  x(:,:,:) = xmiss
!
!   FIRST COMPUTE THE MEANS, VARIANCES, STANDARD-DEVIATIONS AND NUMBER OF OBSERVATIONS 
!   OF x FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_mvs( x(:,:,:), first, last, xmean1(:,:), xvar1(:,:), xstd1(:,:),  &
                   xmiss=xmiss, xnobs=xnobs1(:,:) )
!
!   SECOND RECOMPUTE THE MEANS, VARIANCES, STANDARD-DEVIATIONS AND NUMBER OF OBSERVATIONS OF x
!   ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_mvs( x(:,:,i:i), first, last, xmean2(:,:), xvar2(:,:), xstd2(:,:),  &
                       xmiss=xmiss, xnobs=xnobs2(:,:)  )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err_mean = maxval( abs( ( xmean1-xmean2)/xmean1 ) )
    err_var  = maxval( abs( ( xvar1-xvar2)/xvar1    ) )
    err_std  = maxval( abs( ( xstd1-xstd2)/xstd1    ) )
!
    if ( max(err_mean, err_var, err_std )<=eps .and. all( xnobs2(:,:)==xnobs1(:,:) ) ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_comp_mvs
! ===========================
!
end program ex2_comp_mvs

ex2_comp_triang_inv.F90

program ex2_comp_triang_inv
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_TRIANG_INV
!   in module Lin_Procedures for computing the inverse of a real triangular matrix
!   in place.                                                                      
!                                                                              
!                                                                              
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, one, true, false, allocate_error,   &
                         triangle, norm, comp_triang_inv, merror
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE TRIANGULAR MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=4000, p=n*(n+1)/2
!
    character(len=*), parameter :: name_proc='Example 2 of comp_triang_inv'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, res
    real(stnd), dimension(:),   allocatable :: ap
!
    integer(i4b) :: j
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : COMPUTE IN PLACE THE INVERSE OF A REAL n-BY-n TRIANGULAR MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SPECIFY IF THE MATRIX IS UPPER OR LOWER TRIANGULAR.
!
    upper = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), ap(p), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM TRIANGULAR MATRIX IN PACKED FORM ap .
!
    call random_number( ap )
!
!   MAKE SURE THAT TRIANGULAR MATRIX IS NOT SINGULAR.
!
    ap = ap + real( n, stnd )
!
!   UNPACK THE TRIANGULAR MATRIX a .
!
    a = unpack( ap, mask=triangle(upper,n,n,extra=1_i4b), field=zero )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), res(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE TRIANGULAR MATRIX.
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE IN PLACE THE INVERSE OF a WITH SUBROUTINE comp_triang_inv.
!   THE INPUT ARGUMENT IS OVERWRITTEN.
!
    call comp_triang_inv( a, upper=upper )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       COMPUTE a TIMES ITS INVERSE - IDENTITY.
!
        res(:n,:n) = matmul( a(:n,:n), a2(:n,:n) )
!
        do j = 1_i4b, n
            res(j,j) = res(j,j) - one
        end do
!
        err = norm( res(:n,:n) ) /( real(n,stnd)*norm(a2(:n,:n)) )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, ap, a2, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, ap )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing in place the inverse of a real triangular matrix of size ', &
       n, ' by ', n, ' is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_comp_triang_inv
! ==================================
!
end program ex2_comp_triang_inv

ex2_comp_unistat.F90

program ex2_comp_unistat
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine COMP_UNISTAT
!   in module Stat_Procedures for computing univariate statistics from a dataset
!   with missing values.
!   The dataset can have up to four dimensions and the last one corresponds to
!   the observations.
!
!   All the statistics are computed with only one-pass on the data in one or several steps 
!   with a very efficient algorithm for large datasets, which also allows out-of-core
!   computations.
!
!
! LATEST REVISION : 14/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_unistat
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! xmiss IS THE MISSING INDICATOR.
!
    real(stnd), parameter  :: xmiss=-999.99_stnd
!
! prtunit IS THE PRINTING UNIT; n, m AND p ARE THE DIMENSIONS OF THE ARRAYS.
! p IS THE NUMBER OF OBSERVATIONS.
!
    integer(i4b), parameter :: prtunit=6, n=26, m=20, p=250
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                      :: err1, err2, eps
    real(stnd), dimension(n,m,7)    :: xstat1, xstat2
    real(stnd), dimension(n,m,p)    :: x
!
    integer(i4b)                  :: i
    integer(i4b), dimension(n,m)  :: xnobs1, xnobs2
!
    logical(lgl) :: first, last
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 2 of comp_unistat'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon(eps) )
!
!   GENERATE A RANDOM OBSERVATION ARRAY x WITH MISSING VALUES, xmiss IS THE MISSING INDICATOR.
!
    call random_number( x )
!
    where( x(:,:,:)<=0.25_stnd )  x(:,:,:) = xmiss
!
!   FIRST COMPUTE THE STATISTICS OF x FOR THE p OBSERVATIONS IN ONE STEP.
!
    first = true
    last  = true
!
    call comp_unistat( x(:n,:m,:p), first, last, xstat1(:n,:m,:7), xmiss=xmiss, xnobs=xnobs1(:n,:m) )
!
!   ON EXIT, WHEN last=true, xstat1 CONTAINS THE FOLLOWING
!   STATISTICS ON ALL VARIABLES :
!
!        xstat1(:,:,1) CONTAINS THE MEAN VALUES.
!        xstat1(:,:,2) CONTAINS THE VARIANCES.
!        xstat1(:,:,3) CONTAINS THE STANDARD DEVIATIONS.
!        xstat1(:,:,4) CONTAINS THE COEFFICIENTS OF SKEWNESS.
!        xstat1(:,:,5) CONTAINS THE COEFFICIENTS OF KURTOSIS.
!        xstat1(:,:,6) CONTAINS THE MINIMA.
!        xstat1(:,:,7) CONTAINS THE MAXIMA.
!
!   ON EXIT,  xnobs(:,:) CONTAINS THE NUMBERS OF NON-MISSING OBSERVATIONS
!   ON ALL VARIABLES. xnobs NEEDS TO BE SPECIFIED ONLY ON THE LAST 
!   CALL TO comp_unistat (LAST=true).
!
!   SECOND RECOMPUTE THE STATISTICS OF x, ITERATIVELY FOR THE p OBSERVATIONS.
!
    do i = 1, p
!
        first = i==1
        last  = i==p
!
        call comp_unistat( x(:n,:m,i:i), first, last, xstat2(:n,:m,:7), xmiss=xmiss, xnobs=xnobs2(:n,:m) )
!
    end do
!
!   CHECK THAT THE TWO SETS OF STATISTICS AGREE.
!
    err1 = maxval( abs( (xstat2(:,:,1:3)-xstat1(:,:,1:3))/xstat1(:,:,1:3) ) )
    err2 = maxval( abs( xstat2(:,:,4:7)-xstat1(:,:,4:7) ) )
!
    if ( max(err1, err2)<=eps  .and. all( xnobs2(:,:)==xnobs1(:,:) ) ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_comp_unistat
! ===============================
!
end program ex2_comp_unistat

ex2_drawsample.F90

program ex2_drawsample
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_COR and DRAWSAMPLE
!   in modules Mul_Stat_Procedures and Random, respectively, for computing a permutation
!   test of a correlation coefficient.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use statpack, only : i4b, stnd, lgl, true, comp_cor, drawsample, random_seed_, random_number_
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! p       IS THE NUMBER OF OBSERVATIONS OF THE RANDOM VECTORS;
! nrep    IS THE NUMBER OF SHUFFLES FOR THE PERMUTATION TEST.
!
    integer(i4b), parameter :: prtunit=6, p=47, p1=37, p2=p, p3=p2-p1+1, nrep=9999, nsample=5
!
    character(len=*), parameter :: name_proc='Example 2 of drawsample'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                        :: xyn
    real(stnd), dimension(nsample)    :: xycor, prob, xycor2
    real(stnd), dimension(nsample,2)  :: xstat
    real(stnd), dimension(2)          :: ystat
    real(stnd), dimension(nsample,p)  :: x
    real(stnd), dimension(nsample,p3) :: x2
    real(stnd), dimension(p)          :: y
    real(stnd), dimension(p3)         :: y2
    real(stnd), dimension(6,p)         :: dat
!
    integer(i4b)                     :: i
    integer(i4b), dimension(p)       :: pop
    integer(i4b), dimension(nsample) :: nge
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE DATA MATRIX.
!
    dat = reshape( (/  &
24.54,18.62,1.69,3.69,5.92,1915.7,&
24.22,18.38,1.54,3.58,5.83,2088.1,&
24.40,18.13,2.26,4.17,6.28,1910.1,&
24.33,17.99,2.28,4.21,6.34,1925.6,&
24.47,18.28,2.23,4.11,6.18,2040.1,&
24.07,18.47,1.13,3.25,5.60,2314.8,&
24.51,18.80,1.23,3.36,5.71,2120.3,&
24.49,18.43,1.95,3.90,6.06,1819.4,&
24.55,18.53,1.69,3.75,6.02,1889.5,&
24.74,18.52,2.15,4.08,6.22,1692.1,&
24.99,18.33,2.36,4.40,6.65,1845.3,&
24.85,17.69,2.78,4.86,7.16,2060.8,&
24.42,17.83,2.12,4.24,6.59,2010.8,&
24.58,18.55,1.66,3.74,6.03,2166.1,&
24.53,18.50,1.63,3.72,6.03,2113.6,&
24.55,18.19,2.41,4.28,6.36,1482.6,&
24.82,18.52,2.01,4.04,6.29,1539.1,&
24.37,18.47,1.83,3.76,5.90,1830.7,&
24.65,17.91,2.15,4.33,6.74,1664.7,&
24.66,18.22,2.06,4.14,6.45,2368.1,&
25.35,18.88,2.10,4.18,6.48,2542.3,&
25.02,18.58,2.30,4.26,6.44,2263.3,&
24.67,18.31,2.23,4.19,6.36,2250.4,&
24.24,18.26,1.82,3.79,5.98,1929.3,&
24.50,18.82,1.51,3.49,5.68,2501.0,&
24.41,18.84,1.58,3.47,5.57,2158.7,&
24.64,18.84,2.06,3.83,5.80,2229.8,&
24.65,19.16,1.59,3.44,5.48,1881.2,&
24.51,18.84,1.95,3.72,5.67,1981.1,&
24.64,18.90,2.12,3.84,5.74,2862.6,&
24.63,18.42,1.69,3.84,6.21,2526.3,&
25.22,18.78,2.11,4.16,6.43,2057.5,&
25.08,18.17,2.36,4.52,6.91,2464.4,&
25.02,18.63,1.93,4.05,6.39,2444.2,&
24.94,18.78,1.82,3.88,6.15,1965.9,&
25.31,18.35,2.41,4.57,6.97,1991.9,&
25.08,18.45,2.39,4.40,6.63,2205.1,&
24.79,18.54,1.97,4.00,6.26,2080.0,&
24.88,18.80,1.99,3.93,6.08,2331.2,&
24.31,19.10,1.28,3.14,5.21,2677.1,&
24.59,18.64,1.71,3.72,5.94,2415.5,&
24.97,18.64,1.96,4.03,6.32,1998.2,&
25.10,18.41,1.93,4.19,6.68,1925.8,&
25.14,18.66,2.21,4.24,6.48,2128.9,&
24.39,18.96,1.62,3.43,5.43,1977.4,&
25.36,18.30,2.19,4.50,7.06,1831.8,&
24.99,18.68,2.13,4.12,6.32,1967.0 &
                      /) ,shape=(/ 6, p /) )
!

!
    y(:p) = dat(6,:)
!
    x(:,:p) = dat(1:5,:)
!
!   COMPUTE THE CORRELATIONS BETWEEN x AND y
!   FOR THE p2-p1+1 LAST  OBSERVATIONS.
!
    first = true
    last  = true
!
    call comp_cor( x(:nsample,p1:p2), y(p1:p2), first, last, xstat(:nsample,:2), ystat(:2),    &
                   xycor(:nsample), xyn )
!
!   ON EXIT OF COMP_COR WHEN last=true :
!
!      xstat(:nsample,1)     CONTAINS THE MEAN VALUES OF THE OBSERVATION ARRAY x(:nsample,p1:p2).
!
!      xstat(:nsample,2)     CONTAINS THE VARIANCES OF THE OBSERVATION ARRAY x(:nsample,p1:p2).
!
!      ystat(1)              CONTAINS THE MEAN VALUE OF THE OBSERVATION VECTOR  y(p1:p2).
!
!      ystat(2)              CONTAINS THE VARIANCE OF THE OBSERVATION VECTOR  y(p1:p2).
!
!      xycor(:nsample)       CONTAINS THE CORRELATION COEFFICIENTS BETWEEN x(:nsample,p1:p2) AND y(p1:p2).
!
!      xyn                   CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAYS
!                            x(:nsample,p1:p2) AND  y(p1:p2) (xyn=real(p2-p1+1,stnd) ).
!
!
!   NOW COMPUTE A PERMUTATION TEST OF THE CORRELATION BETWEEN x AND y WITH
!   SUBROUTINES drawsample  AND comp_cor WITH nrep SHUFFLES.
!
    nge(:nsample) = 1
    call random_seed_( )
!
    do i=1, nrep
!
        call drawsample( p3, pop )
!
        x2(:nsample,:p3) = x(:nsample,pop(:p3))
        y2(:p3)          = y(pop(:p3))
!
        call comp_cor( x2(:nsample,:p3), y2(:p3), first, last, xstat(:nsample,:2), ystat(:2),    &
                       xycor2(:nsample), xyn )
!
        where( abs( xycor2(:nsample) )>= abs( xycor(:nsample) ) ) nge(:nsample) = nge(:nsample) + 1
!
    end do 
!
!   COMPUTE THE SIGNIFICANCE LEVELS.
!
    prob(:nsample) = real( nge(:nsample), stnd )/real( nrep+1, stnd )
!
    write (prtunit,*) 'Correlations  = ', xycor(:nsample)
    write (prtunit,*) 'Probabilities = ', prob(:nsample)
!
!
! END OF PROGRAM ex2_drawsample
! =============================
!
end program ex2_drawsample

ex2_eig_cmp.F90

program ex2_eig_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIG_CMP
!   in module Eig_Procedures for computing the full EigenValue Decomposition (EVD)
!   of a real symmetric matrix when only the lower (or upper) triangle of this matrix is stored.
!
!   However, in this case, the initial tridiagonal reduction is not blocked or parallelized
!   in the present version of STATPACK. The second step, the tridiagonal QR algorithm, is
!   still blocked and parallelized (if OpenMP is used).
!
!
! LATEST REVISION : 04/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, merror,   &
                         allocate_error, unit_matrix, eig_cmp
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of eig_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: d, resid2
    real(stnd), dimension(:,:), allocatable :: a, a2, resid
!
    integer        :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test, failure, upper
!   
    character    :: sort='a'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC
!               MATRIX USING THE TRIDIAGONAL QR METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SPECIFY IF THE UPPER OR LOWER TRIANGLE OF THE SYMMETRIC
!   MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test ) then
!
        allocate( a2(n,n), resid(n,n), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE SELF-ADJOINT MATRIX a
!   WITH SUBROUTINE eig_cmp.
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE SELF-ADJOINT MATRIX a ,
!   BY USING ONLY THE LOWER TRIANGLE OF a ON INPUT INSTEAD OF THE UPPER
!   TRIANGLE.
!
    call eig_cmp( a, d, failure, sort=sort, upper=upper )
!
!   THE ROUTINE RETURNS THE EIGENVALUES AND THE EIGENVECTORS OF a .
!
!   ON EXIT OF eig_cmp:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE TRIDIAGONAL QR ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a .
!
!       a IS OVERWRITTEN WITH THE EIGENVECTORS, STORED COLUMNWISE;
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!   THE EIGENVECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        resid(:n,:n) = matmul(a2(:n,:n),a(:n,:n)) - a(:n,:n)*spread(d,1,n)
        resid2(:n) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( resid2(:n) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:n,:n) )
!
        resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose(a(:n,:n)), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, d, resid, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, d )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eig_cmp() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', n, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_eig_cmp
! ==========================
!
end program ex2_eig_cmp

ex2_eig_cmp2.F90

program ex2_eig_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIG_CMP2
!   in module Eig_Procedures for computing the full EigenValue Decomposition (EVD)
!   of a real symmetric matrix when only the lower (or upper) triangle of this matrix is stored.
!
!   However, in this case, the initial tridiagonal reduction is not blocked or parallelized
!   in the present version of STATPACK. The second step, the tridiagonal QR algorithm, is
!   still blocked and parallelized (if OpenMP is used). Furthermore, a perfect shift strategy
!   and a wave-front algorithm for applying Givens rotations to eigenvectors are used in the
!   tridiagonal QR algorithm. With these changes, EIG_CMP2 is usually much faster than
!   subroutine EIG_CMP for computing an EVD of a real symmetric matrix for large matrices.
!                                                                            
!
! LATEST REVISION : 04/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, merror,   &
                         allocate_error, unit_matrix, eig_cmp2
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of eig_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: d, resid2
    real(stnd), dimension(:,:), allocatable :: a, a2, resid
!
    integer        :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test, failure, upper
!   
    character    :: sort='a'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC
!               MATRIX USING THE QR METHOD, A PERFECT SHIFT
!               STRATEGY FOR THE EIGENVECTORS AND A WAVE-FRONT ALGORITHM
!               FOR APPLYING GIVENS ROTATIONS IN THE TRIDIAGONAL QR
!               ALGORITHM.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SPECIFY IF THE UPPER OR LOWER TRIANGLE OF THE SYMMETRIC
!   MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test ) then
!
        allocate( a2(n,n), resid(n,n), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE SELF-ADJOINT MATRIX a
!   WITH SUBROUTINE eig_cmp2.
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a .
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE SELF-ADJOINT MATRIX a ,
!   BY USING ONLY THE LOWER TRIANGLE OF a ON INPUT INSTEAD OF THE UPPER
!   TRIANGLE.
!
    call eig_cmp2( a, d, failure, sort=sort, upper=upper )
!
!   THE ROUTINE RETURNS THE EIGENVALUES AND THE EIGENVECTORS OF a .
!
!   ON EXIT OF eig_cmp2:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       a IS OVERWRITTEN WITH THE EIGENVECTORS, STORED COLUMNWISE;
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!   THE EIGENVECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        resid(:n,:n) = matmul(a2(:n,:n),a(:n,:n)) - a(:n,:n)*spread(d,1,n)
        resid2(:n) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( resid2(:n) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:n,:n) )
!
        resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose(a(:n,:n)), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, d, resid, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, d )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eig_cmp2() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', n, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_eig_cmp2
! ===========================
!
end program ex2_eig_cmp2

ex2_eig_cmp3.F90

program ex2_eig_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIG_CMP3
!   in module Eig_Procedures for computing the full EigenValue Decomposition (EVD)
!   of a real symmetric matrix when only the lower (or upper) triangle of this matrix is stored.
!
!   However, in this case, the initial tridiagonal reduction is not blocked or parallelized
!   in the present version of STATPACK. The second step, the tridiagonal QR algorithm, is
!   still blocked and parallelized (if OpenMP is used). Furthermore, a wave-front
!   algorithm for applying Givens rotations to eigenvectors is used in the
!   tridiagonal QR algorithm. With this change, EIG_CMP3 is usually much faster than
!   subroutine EIG_CMP for computing an EVD of a real symmetric matrix for large matrices.
!                                                                            
!
! LATEST REVISION : 04/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, merror,   &
                         allocate_error, unit_matrix, eig_cmp3
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED SYMMETRIC MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of eig_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: d, resid2
    real(stnd), dimension(:,:), allocatable :: a, a2, resid
!
    integer        :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test, failure, upper
!   
    character    :: sort='a'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC
!               MATRIX USING THE QR METHOD AND A WAVE-FRONT ALGORITHM
!               FOR APPLYING GIVENS ROTATIONS IN THE TRIDIAGONAL QR
!               ALGORITHM.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SPECIFY IF THE UPPER OR LOWER TRIANGLE OF THE SYMMETRIC
!   MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test ) then
!
        allocate( a2(n,n), resid(n,n), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE SELF-ADJOINT MATRIX a
!   WITH SUBROUTINE eig_cmp3.
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
!   COMPUTE EIGENVALUES AND EIGENVECTORS OF THE SELF-ADJOINT MATRIX a ,
!   BY USING ONLY THE LOWER TRIANGLE OF a ON INPUT INSTEAD OF THE UPPER
!   TRIANGLE.
!
    call eig_cmp3( a, d, failure, sort=sort, upper=upper )
!
!   THE ROUTINE RETURNS THE EIGENVALUES AND THE EIGENVECTORS OF a.
!
!   ON EXIT OF eig_cmp3:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       a IS OVERWRITTEN WITH THE EIGENVECTORS, STORED COLUMNWISE;
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!   THE EIGENVECTORS ARE REARRANGED ACCORDINGLY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        resid(:n,:n) = matmul(a2(:n,:n),a(:n,:n)) - a(:n,:n)*spread(d,1,n)
        resid2(:n) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( resid2(:n) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a2(:n,:n) )
!
        resid(:n,:n) = abs( a2(:n,:n) - matmul( transpose(a(:n,:n)), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, d, resid, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, d )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eig_cmp2() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', n, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_eig_cmp3
! ===========================
!
end program ex2_eig_cmp3

ex2_eigval_cmp.F90

program ex2_eigval_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIGVAL_CMP
!   in module Eig_Procedures for computing all eigenvalues of a real symmetric matrix
!   stored in packed form.
!
!   The initial tridiagonal reduction is parallelized if OpenMP is used and the fast
!   Pal-Walker-Kahan variant of the QR method with implicit shift is used for
!   computing the eigenvalues.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing eigenvectors of a real symmetric matrix in packed form.
!
!
! LATEST REVISION : 04/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, triangle, trid_inviter,  &
                         eigval_cmp, merror, allocate_error, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX,
! neig IS THE NUMBER OF THE WANTED EIGENVECTORS COMPUTED BY INVERSE ITERATIONS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP neig EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, p=(n*(n+1))/2, neig=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of eigval_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: vec, d, res
    real(stnd), dimension(:,:), allocatable :: a, eigvec, d_e, resmat
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test, upper
!   
    character :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX STORED IN PACKED FORM USING THE FAST PAL-WALKER-KAHAN VARIANT
!               OF THE QR METHOD FOR EIGENVALUES AND THE INVERSE ITERATION TECHNIQUE FOR
!               SELECTED EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SPECIFY IF THE UPPER OR LOWER TRIANGLE OF THE SYMMETRIC
!   MATRIX IS STORED IN PACKED FORM.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), d_e(n,2), vec(p), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
!   MAKE A COPY OF THE SELF-ADJOINT MATRIX IN PACKED FORM.
!
    vec(:p) = pack( a, mask=triangle( upper, n, n, extra=1_i4b) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE ALL EIGENVALUES OF THE SELF-ADJOINT MATRIX a AND SAVE
!   THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    call eigval_cmp( vec, d, failure, sort=sort, d_e=d_e )
!
!   THE ROUTINE RETURNS THE EIGENVALUES OF a.
!
!   ON EXIT OF eigval_cmp:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       vec IS OVERWRITTEN BY THE ORTHOGONAL MATRIX USED TO TRANSFORM a IN TRIDIAGONAL FORM IF
!           THE OPTIONAL ARGUMENT d_e IS SPECIFIED. THE MATRIX Q IS STORED IN FACTORED FORM.
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
    if ( .not. failure .and. neig>0  ) then
!
!       ALLOCATE WORK ARRAY NEEDED TO STORE THE EIGENVECTORS.
!
        allocate( eigvec(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST neig EIGENVECTORS OF THE SELF-ADJOINT MATRIX a
!       STORED IN PACKED FORM BY maxiter INVERSE ITERATIONS ON THE INTERMEDIATE
!       TRIDIAGONAL MATRIX d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1_i4b), d_e(:n,2_i4b), d(:neig), eigvec, failure2, &
                           matp=vec, maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. neig>0 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( resmat(n,neig), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        resmat(:n,:neig) = matmul( a(:n,:n), eigvec(:n,:neig)) - eigvec(:n,:neig)*spread( d(:neig), dim=1, ncopies=n)
        res(:neig)  = norm( resmat(:n,:neig), dim=2_i4b )
!
        err1 = maxval( res(:neig) )/( norm( a )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:neig,:neig) )
!
        resmat(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose(eigvec(:n,:neig)), eigvec(:n,:neig) ) )
!
        err2 = maxval( resmat(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resmat, res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, d_e, d, vec )
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eigval_cmp()   ) = ', failure
!
    if ( .not. failure .and. neig>0  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter() ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure .and. neig>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all the eigenvalues and ', neig, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix stored in packed form is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_eigval_cmp
! =============================
!
end program ex2_eigval_cmp

ex2_eigval_cmp2.F90

program ex2_eigval_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIGVAL_CMP2
!   in module Eig_Procedures for computing all eigenvalues of a real symmetric matrix
!   stored in packed form.
!
!   The initial tridiagonal reduction is parallelized if OpenMP is used and the fast
!   Pal-Walker-Kahan variant of the QR method with implicit shift is used for
!   computing the eigenvalues.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing eigenvectors of a real symmetric matrix in packed form.
!
!
! LATEST REVISION : 04/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, triangle, trid_inviter,  &
                         eigval_cmp2, merror, allocate_error, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX,
! neig IS THE NUMBER OF THE WANTED EIGENVECTORS COMPUTED BY INVERSE ITERATIONS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP neig EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, p=(n*(n+1))/2, neig=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of eigval_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: vec, d, res
    real(stnd), dimension(:,:), allocatable :: a, eigvec, d_e, resmat
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test, upper
!   
    character :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX STORED IN PACKED FORM USING THE FAST PAL-WALKER-KAHAN VARIANT
!               OF THE QR METHOD FOR EIGENVALUES AND THE INVERSE ITERATION TECHNIQUE FOR
!               SELECTED EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SPECIFY IF THE UPPER OR LOWER TRIANGLE OF THE SYMMETRIC
!   MATRIX IS STORED IN PACKED FORM.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), d_e(n,2), vec(p), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
!   MAKE A COPY OF THE SELF-ADJOINT MATRIX IN PACKED FORM.
!
    vec(:p) = pack( a, mask=triangle( upper, n, n, extra=1_i4b) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE ALL EIGENVALUES OF THE SELF-ADJOINT MATRIX a AND SAVE
!   THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    call eigval_cmp2( vec, d, failure, sort=sort, d_e=d_e )
!
!   THE ROUTINE RETURNS THE EIGENVALUES OF a.
!
!   ON EXIT OF eigval_cmp2:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       vec IS OVERWRITTEN BY THE ORTHOGONAL MATRIX USED TO TRANSFORM a IN TRIDIAGONAL FORM IF
!           THE OPTIONAL ARGUMENT d_e IS SPECIFIED. THE MATRIX Q IS STORED IN FACTORED FORM.
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
    if ( .not. failure .and. neig>0  ) then
!
!       ALLOCATE WORK ARRAY NEEDED TO STORE THE EIGENVECTORS.
!
        allocate( eigvec(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST neig EIGENVECTORS OF THE SELF-ADJOINT MATRIX a
!       STORED IN PACKED FORM BY maxiter INVERSE ITERATIONS ON THE INTERMEDIATE
!       TRIDIAGONAL MATRIX d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1_i4b), d_e(:n,2_i4b), d(:neig), eigvec, failure2, &
                           matp=vec, maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. neig>0 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( resmat(n,neig), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        resmat(:n,:neig) = matmul( a(:n,:n), eigvec(:n,:neig)) - eigvec(:n,:neig)*spread( d(:neig), dim=1, ncopies=n)
        res(:neig)  = norm( resmat(:n,:neig), dim=2_i4b )
!
        err1 = maxval( res(:neig) )/( norm( a )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:neig,:neig) )
!
        resmat(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose(eigvec(:n,:neig)), eigvec(:n,:neig) ) )
!
        err2 = maxval( resmat(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resmat, res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, d_e, d, vec )
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eigval_cmp2()   ) = ', failure
!
    if ( .not. failure .and. neig>0  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter()  ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure .and. neig>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all the eigenvalues and ', neig, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix stored in packed form is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_eigval_cmp2
! ==============================
!
end program ex2_eigval_cmp2

ex2_eigval_cmp3.F90

program ex2_eigval_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine EIGVAL_CMP3
!   in module Eig_Procedures for computing all eigenvalues of a real symmetric matrix
!   stored in packed form.
!
!   The initial tridiagonal reduction is parallelized if OpenMP is used and the QR
!   method with implicit shift is used for computing the eigenvalues.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module EIG_Procedures
!    for computing eigenvectors of a real symmetric matrix in packed form.
!
!
! LATEST REVISION : 04/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, norm, triangle, trid_inviter,  &
                         eigval_cmp3, merror, allocate_error, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX,
! neig IS THE NUMBER OF THE WANTED EIGENVECTORS COMPUTED BY INVERSE ITERATIONS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP neig EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, p=(n*(n+1))/2, neig=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of eigval_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: vec, d, res
    real(stnd), dimension(:,:), allocatable :: a, eigvec, d_e, resmat
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test, upper
!   
    character :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX STORED IN PACKED FORM USING THE QR METHOD
!               FOR EIGENVALUES AND THE INVERSE ITERATION TECHNIQUE FOR
!               SELECTED EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SPECIFY IF THE UPPER OR LOWER TRIANGLE OF THE SYMMETRIC
!   MATRIX IS STORED IN PACKED FORM.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), d(n), d_e(n,2), vec(p), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
!   MAKE A COPY OF THE SELF-ADJOINT MATRIX IN PACKED FORM.
!
    vec(:p) = pack( a, mask=triangle( upper, n, n, extra=1_i4b) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE ALL EIGENVALUES OF THE SELF-ADJOINT MATRIX a AND SAVE
!   THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    call eigval_cmp3( vec, d, failure, sort=sort, d_e=d_e )
!
!   THE ROUTINE RETURNS THE EIGENVALUES OF a.
!
!   ON EXIT OF eigval_cmp3:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a.
!
!       vec IS OVERWRITTEN BY THE ORTHOGONAL MATRIX USED TO TRANSFORM a IN TRIDIAGONAL FORM IF
!           THE OPTIONAL ARGUMENT d_e IS SPECIFIED. THE MATRIX Q IS STORED IN FACTORED FORM.
!
!       d IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
    if ( .not. failure .and. neig>0  ) then
!
!       ALLOCATE WORK ARRAY NEEDED TO STORE THE EIGENVECTORS.
!
        allocate( eigvec(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST neig EIGENVECTORS OF THE SELF-ADJOINT MATRIX a
!       STORED IN PACKED FORM BY maxiter INVERSE ITERATIONS ON THE INTERMEDIATE
!       TRIDIAGONAL MATRIX d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1_i4b), d_e(:n,2_i4b), d(:neig), eigvec, failure2, &
                           matp=vec, maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. neig>0 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( resmat(n,neig), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        resmat(:n,:neig) = matmul( a(:n,:n), eigvec(:n,:neig)) - eigvec(:n,:neig)*spread( d(:neig), dim=1, ncopies=n)
        res(:neig)  = norm( resmat(:n,:neig), dim=2_i4b )
!
        err1 = maxval( res(:neig) )/( norm( a )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:neig,:neig) )
!
        resmat(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose(eigvec(:n,:neig)), eigvec(:n,:neig) ) )
!
        err2 = maxval( resmat(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( resmat, res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, d_e, d, vec )
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eigval_cmp3()   ) = ', failure
!
    if ( .not. failure .and. neig>0  ) then
        write (prtunit,*) ' FAILURE ( from trid_inviter()  ) = ', failure2
    end if
!
    if ( do_test  .and. .not.failure .and. neig>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing all the eigenvalues and ', neig, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix stored in packed form is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_eigval_cmp3
! ==============================
!
end program ex2_eigval_cmp3

ex2_gchol_cmp.F90

program ex2_gchol_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines GCHOL_CMP and CHOL_SOLVE
!   in module Lin_Procedures for solving a linear system with a real symmetric semi-positive definite
!   coefficient matrix and several right hand sides with a Cholesky decomposition.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, c10, true, false, gchol_cmp, chol_solve, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIC SEMI-POSITIVE DEFINITE MATRIX.,
! nrhs IS THE NUMBER OF RIGHT HAND SIDES.
!
    integer(i4b), parameter :: prtunit=6, n=1000, m=n-1, nrhs=100
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!
    real(stnd), parameter :: fudge=c10
!
    character(len=*), parameter :: name_proc='Example 2 of gchol_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, tol, d1, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, c, b, b2, d, res
    real(stnd), dimension(:), allocatable   :: invdiag
!
    integer(i4b) :: krank
    integer      :: iok, istart, iend, irate
!
    logical(lgl)   :: do_test, upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : REAL SYMMETRIC DEFINITE POSITIVE MATRIX AND SEVERAL RIGHT HAND-SIDES.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    tol = sqrt( epsilon( err ) )
    eps = fudge*tol
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   SPECIFY IF THE UPPER OR LOWER PART OF THE SYMMETRIC MATRIX IS STORED.
!
    upper = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( c(m,n), a(n,n), b(n,nrhs), invdiag(n), d(m,nrhs), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM SYMMETRIC POSITIVE SEMIDEFINITE MATRIX a .
!
    call random_number( c )
!
    a = matmul( transpose(c), c )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE MATRIX b BELONGING TO THE RANGE OF a.
!
    call random_number( d )
!
    b = matmul( transpose(c), d )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS .
!
        allocate(  a2(n,n), b2(n,nrhs), res(n,nrhs), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF COEFFICIENT MATRIX AND RIGHT HAND-SIDE MATRIX .
!
        a2 = a
        b2 = b
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart, count_rate=irate )
!
!   COMPUTE THE SOLUTION MATRIX FOR SYMMETRIC POSITIVE SEMIDEFINITE
!   SYSTEM
!
!                     a*x = b .
!
!   BY COMPUTING THE CHOLESKY DECOMPOSITION OF MATRIX a .
!   IF ON OUTPUT OF gchol_cmp d1 IS GREATER OR EQUAL TO ZERO
!   THEN THE SYMMETRIC LINEAR SYSTEM CAN BE SOLVED BY 
!   SUBROUTINE chol_solve.
!
    call gchol_cmp( a, invdiag, krank, d1, tol=tol, upper=upper )
!
    if ( d1<zero ) then
!
!       ANORMAL EXIT FROM gchol_cmp SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in the call to GCHOL_CMP subroutine, d1=', d1
!
    else
!
        call chol_solve( a, invdiag, b, upper=upper )
!
        if ( do_test ) then
!
!           CHECK THE RESULTS FOR SMALL RESIDUALS.
!
            res(:n,:nrhs) = b2(:n,:nrhs) - matmul( a2, b(:n,:nrhs) )
            err = maxval( sum( abs(res), dim=1 ) / ( sum(abs(a2)) + sum(abs(b2), dim=1) ) )
!
        end if
!
    end if
!
    call system_clock( count=iend )
!
    elapsed_time = real( iend - istart, stnd )/real( irate, stnd )
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
        deallocate( a, b, c, d, invdiag, a2, b2, res )
    else
        deallocate( a, b, c, d, invdiag )
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1>=zero ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a real symmetric semi-positive definite system of size ', &
       n, ' with', nrhs, ' right hand side vectors is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_gchol_cmp
! ============================
!
end program ex2_gchol_cmp

ex2_hwfilter.F90

program ex2_hwfilter
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine HWFILTER
!   in module Time_Series_Procedures for filtering a multi-channel time series
!   in a specific frequency band.
!                                                                              
!                                                                              
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, one, hwfilter
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES,
! m IS THE NUMBER OF TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=1000, m=1000
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                 :: err, win
    real(stnd), dimension(m,n) :: y, y2, y3
!
    integer(i4b) :: minp, maxp
!   
!   
! PARAMETERS 
! ==========
!
    character(len=*), parameter :: name_proc='Example 2 of hwfilter'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE m RANDOM REAL NUMBER SEQUENCES OF LENGTH n .
!
    call random_number( y(:m,:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:m,:n) = y(:m,:n)
    y3(:m,:n) = y(:m,:n)
!
    minp  = 18_i4b
    maxp  = 96_i4b
!
!   BY DEFAULT, HAMMMING WINDOW FILTERING IS USED (i.e. win=0.54).
!   SET win=0.5 for HANNING WINDOW OR win=1 FOR RECTANGULAR WINDOW.
!   IN ANY CASE, win MUST BE GREATER OR EQUAL TO 0.5 AND LESS OR EQUAL TO 1.
!
    win = one
!
!   hwfilter FILTERS A NUMBER OF TIME SERIES (THE ARGUMENT MAT) IN THE FREQUENCY BAND
!   LIMITED BY PERIODS PL AND PH BY WINDOWED FILTERING (PL AND PH ARE EXPRESSED IN NUMBER OF
!   POINTS, i.e. PL=18 AND PH=96 SELECTS PERIODS BETWEEN 1.5 YRS AND 8 YRS FOR MONTHLY DATA).
!
!   FILTER THE TIME SERIES, KEEP THE PERIODS BETWEEN minp AND maxp .
!
    call hwfilter( mat=y2(:m,:n), pl=minp, ph=maxp, win=win, max_alloc=1000 )
!
!   SETTING PH<PL IS ALLOWED AND PERFORMS BAND REJECTION OF PERIODS BETWEEN PH AND PL.
!
!   FILTER THE TIME SERIES, KEEP THE PERIODS OUTSIDE THE BAND BETWEEN minp AND maxp .
!
    call hwfilter( mat=y3(:m,:n), pl=maxp, ph=minp, win=win, max_alloc=1000 )
!
!   NOW RECONSTRUCT THE ORIGINAL TIME SERIES FROM THE FILTERED TIME SERIES.
!
    y2(:m,:n) = y2(:m,:n) + y3(:m,:n)
!
!   TEST THE ACCURACY OF THE RECONSTRUCTION.
!
    err = maxval(abs(y(:m,:n)-y2(:m,:n)))/maxval(abs(y(:m,:n)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_hwfilter
! ===========================
!
end program ex2_hwfilter

ex2_hwfilter2.F90

program ex2_hwfilter2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine HWFILTER2
!   in module Time_Series_Procedures for filtering a multi-channel time series
!   in a specific frequency band.
!                                                                              
!                                                                              
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, one, hwfilter2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE LENGTH OF THE TIME SERIES,
! m IS THE NUMBER OF TIME SERIES.
!
    integer(i4b), parameter :: prtunit=6, n=500, m=1000
!
    character(len=*), parameter :: name_proc='Example 2 of hwfilter2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                 :: err, win
    real(stnd), dimension(m,n) :: y, y2, y3
!
    integer(i4b) :: minp, maxp
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE m RANDOM REAL NUMBER SEQUENCES OF LENGTH n .
!
    call random_number( y(:m,:n) )
!
!   SAVE THE REAL RANDOM NUMBER ARRAY.
!
    y2(:m,:n) = y(:m,:n)
    y3(:m,:n) = y(:m,:n)
!
    minp  = 18_i4b
    maxp  = 96_i4b
!
!   BY DEFAULT, HAMMMING WINDOW FILTERING IS USED (i.e. win=0.54).
!   SET win=0.5 for HANNING WINDOW OR win=1 FOR RECTANGULAR WINDOW.
!   IN ANY CASE, win MUST BE GREATER OR EQUAL TO 0.5 AND LESS OR EQUAL TO 1.
!
    win = one
!
!   hwfilter FILTERS A NUMBER OF TIME SERIES (THE ARGUMENT MAT) IN THE FREQUENCY BAND
!   LIMITED BY PERIODS PL AND PH BY WINDOWED FILTERING (PL AND PH ARE EXPRESSED IN NUMBER OF
!   POINTS, i.e. PL=18 AND PH=96 SELECTS PERIODS BETWEEN 1.5 YRS AND 8 YRS FOR MONTHLY DATA).
!
!   FILTER THE TIME SERIES, KEEP THE PERIODS BETWEEN minp AND maxp .
!
    call hwfilter2( mat=y2(:m,:n), pl=minp, ph=maxp, win=win )
!
!   SETTING PH<PL IS ALLOWED AND PERFORMS BAND REJECTION OF PERIODS BETWEEN PH AND PL.
!
!   FILTER THE TIME SERIES, KEEP THE PERIODS OUTSIDE THE BAND BETWEEN minp AND maxp .
!
    call hwfilter2( mat=y3(:m,:n), pl=maxp, ph=minp, win=win )
!
!   NOW RECONSTRUCT THE ORIGINAL TIME SERIES FROM THE FILTERED TIME SERIES.
!
    y2(:m,:n) = y2(:m,:n) + y3(:m,:n)
!
!   TEST THE ACCURACY OF THE RECONSTRUCTION.
!
    err = maxval(abs(y(:m,:n)-y2(:m,:n)))/maxval(abs(y(:m,:n)))
!
    if ( err<=sqrt(epsilon(err))  ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_hwfilter2
! ============================
!
end program ex2_hwfilter2

ex2_lin_lu_solve.F90

program ex2_lin_lu_solve
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LIN_LU_SOLVE
!   in module Lin_Procedures for solving a real linear system by a LU decomposition
!   with partial pivoting and implicit row scaling and several right hand sides.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, lin_lu_solve, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE SIZE OF THE LINEAR SYSTEM TO BE SOLVED
! AND nrhs IS THE NUMBER OF RIGHT HAND SIDES.
!
    integer(i4b), parameter :: prtunit=6, n=4000, nrhs=4000
!
    character(len=*), parameter :: name_proc='Example 2 of lin_lu_solve'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, b, x, res
!
    integer        :: iok, istart, iend, irate
!
    logical(lgl)   :: failure, do_test
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : REAL MATRIX AND SEVERAL RIGHT HAND-SIDES.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n,nrhs), x(n,nrhs), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
!   GENERATE A n-by-nrhs RANDOM SOLUTION MATRIX x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-MATRIX PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart, count_rate=irate )
!
!   COMPUTE THE SOLUTION MATRIX FOR LINEAR SYSTEM
!
!            a*x = b .
!
!   BY COMPUTING THE LU DECOMPOSITION WITH PARTIAL PIVOTING AND
!   IMPLICIT ROW SCALING OF MATRIX a. IF ON OUTPUT OF lin_lu_solve
!   failure IS SET TO FALSE THEN THE LINEAR SYSTEM IS NOT SINGULAR
!   AND THE SOLUTION MATRIX HAS BEEN COMPUTED.
!
    call lin_lu_solve( a, b, failure )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    elapsed_time = real( iend - istart, stnd )/real( irate, stnd )
!
    if ( failure ) then
!
!       ANORMAL EXIT FROM lin_lu_solve SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in the call to LIN_LU_SOLVE subroutine, failure=', failure
!
    else if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n,nrhs), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n,:nrhs) = b(:n,:nrhs) - x(:n,:nrhs)
        err = maxval( sum( abs(res), dim=1 ) /    &
                      sum(abs(x),    dim=1 )      )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the solutions of a linear real system of size ', &
       n, ' with', nrhs,' right hand sides is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_lin_lu_solve
! ===============================
!
end program ex2_lin_lu_solve

ex2_llsq_qr_solve.F90

program ex2_llsq_qr_solve
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LLSQ_QR_SOLVE
!   in module LLSQ_Procedures for solving linear least squares problems with a
!   QR decomposition (with column pivoting) or a complete orthogonal factorization
!   of the coefficient matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c50, allocate_error, &
                         merror, llsq_qr_solve
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! nrhs IS THE NUMBER OF COLUMNS OF THE RIGHT HAND SIDE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=3000, nrhs=100
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of llsq_qr_solve'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, err, elapsed_time
    real(stnd), allocatable, dimension(:,:) :: x, resid, b, a
!
    integer(i4b) :: krank
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, min_norm
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SOLVING LINEAR LEAST SQUARES PROBLEMS WITH A m-BY-n REAL COEFFICIENT
!               MATRIX AND MULTIPLE RIGHT HAND SIDES USING A QR DECOMPOSITION WITH COLUMN
!               PIVOTING OR A COMPLETE ORTHOGONAL DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!               COMPUTE SOLUTION MATRIX FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n,:nrhs) ≈ b(:m,:nrhs) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED.
!
    krank = 0
!
!   DECIDE IF THE MINIMUN 2-NORM SOLUTION(S) MUST BE COMPUTED.
!
    min_norm = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m,nrhs), resid(m,nrhs), x(n,nrhs), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE MATRIX b(:m,:nrhs) .
!
    call random_number( b(:m,:nrhs) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   SOLVE THE LINEAR LEAST SQUARES PROBLEM BY A QR DECOMPOSITION WITH COLUMN PIVOTING
!   USING SUBROUTINE llsq_qr_solve.
!
    call llsq_qr_solve( a(:m,:n), b(:m,:nrhs), x(:n,:nrhs), resid=resid(:m,:nrhs),  &
                        krank=krank, tol=tol, min_norm=min_norm   )
!
!   llsq_qr_solve COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OF a. a IS A m-ELEMENTS
!   VECTOR OR AN m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. IF a IS A MATRIX, m>=n OR
!   n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES.
!
!   a AND b ARE NOT OVERWRITTEN BY llsq_qr_solve. THE OPTIONAL ARGUMENTS krank,
!   tol AND min_norm MUST NOT BE PRESENT IF a IS A m-ELEMENTS VECTOR.
!
!   ON INPUT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED AND IF krank=k,
!   THIS IMPLIES THAT THE FIRST k COLUMNS OF a ARE TO BE FORCED INTO THE BASIS.
!   PIVOTING IS PERFORMED ON THE LAST n-k COLUMNS OF a.
!   
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a.
!   
!   BY DEFAULT, PIVOTING IS DONE ON ALL THE COLUMNS OF a.
!   
!   ON EXIT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED,
!   krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, SPECIFIC CALCULATIONS TO DETERMINE THE EFFECTIVE RANK a
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS THEN DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IN THE 1-NORM IS LESS THAN 1/tol.
!       IF tol=0 IS SPECIFIED THE NUMERICAL RANK OF a IS DETERMINED.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE EFFECTIVE RANK OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING krank=0  AND tol=RELATIVE PRECISION OF THE ELEMENTS
!   IN a. IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD
!   BE USED. ALSO, IT MAY BE HELPFUL TO SCALE THE COLUMNS OF a SO THAT ALL ELEMENTS 
!   ARE ABOUT THE SAME ORDER OF MAGNITUDE.
!   
!   ON EXIT, IF THE OPTIONAL PARAMETER resid IS PRESENT,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND ON EXIT
!
!                               resid = b - a*x .
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF llsq_qr_solve .
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL LOGICAL PARAMETER min_norm IS
!   PRESENT AND IS SET TO true IN THE CALL TO llsq_qr_solve. OTHERWISE, SOLUTION(S) ARE COMPUTED
!   SUCH THAT IF THE jTH COLUMN OF a IS OMITTED FROM THE BASIS, x[j] OR x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
        err = maxval( sum( abs( matmul( transpose(resid), a ) ), dim=2 ) )/ sum( abs(a) )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, resid, x )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
      m, ' by ', n,' real coefficient matrix and a ', m, ' by ', nrhs,       &
      ' right hand side matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_llsq_qr_solve
! ================================
!
end program ex2_llsq_qr_solve

ex2_llsq_qr_solve2.F90

program ex2_llsq_qr_solve2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LLSQ_QR_SOLVE2
!   in module LLSQ_Procedures for solving linear least squares problems with a
!   QR decomposition (with column pivoting) or a complete orthogonal factorization
!   of the coefficient matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c50, allocate_error, &
                         merror, llsq_qr_solve2
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! nrhs IS THE NUMBER OF COLUMNS OF THE RIGHT HAND SIDE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=3000, mn=min( m, n ), nrhs=100
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of llsq_qr_solve2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, err, elapsed_time
    real(stnd), allocatable, dimension(:,:) :: a, a2, x, b
!
    integer(i4b)                            :: krank, j, l, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, min_norm, do_test, test_lin
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SOLVING LINEAR LEAST SQUARES PROBLEMS WITH A m-BY-n REAL COEFFICIENT
!               MATRIX AND MULTIPLE RIGHT HAND SIDES USING A QR DECOMPOSITION WITH COLUMN
!               PIVOTING OR A COMPLETE ORTHOGONAL DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!               COMPUTE SOLUTION MATRIX FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n,:nrhs) ≈ b(:m,:nrhs) .
!
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED.
!
    krank = 0
!
!   DECIDE IF THE RESIUDALS MUST BE COMPUTED.
!
    comp_resid = true
!
!   DECIDE IF THE MINIMUN 2-NORM SOLUTION(S) MUST BE COMPUTED.
!
    min_norm = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m,nrhs), x(n,nrhs), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(min(m,n)-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE MATRIX b(:m,:nrhs) .
!
    call random_number( b(:m,:nrhs) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(m,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   SOLVE THE LINEAR LEAST SQUARES PROBLEM BY A QR DECOMPOSITION WITH COLUMN PIVOTING
!   USING SUBROUTINE llsq_qr_solve2.
!
    call llsq_qr_solve2( a(:m,:n), b(:m,:nrhs), x(:n,:nrhs), comp_resid=comp_resid, &
                         krank=krank, tol=tol, min_norm=min_norm, ip=ip(:n) )
!
!   llsq_qr_solve2 COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OF a. a IS A m-ELEMENTS
!   VECTOR OR AN m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. IF a IS A MATRIX, m>=n OR
!   n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES.
!
!   a AND b ARE OVERWRITTEN BY llsq_qr_solve2. THE OPTIONAL ARGUMENTS krank,
!   tol AND min_norm MUST NOT BE PRESENT IF a IS A m-ELEMENTS VECTOR.
!
!   ON INPUT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED AND IF krank=k,
!   THIS IMPLIES THAT THE FIRST k COLUMNS OF a ARE TO BE FORCED INTO THE BASIS.
!   PIVOTING IS PERFORMED ON THE LAST n-k COLUMNS OF a.
!   
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a.
!   
!   BY DEFAULT, PIVOTING IS DONE ON ALL THE COLUMNS OF a.
!   
!   ON EXIT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED,
!   krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, SPECIFIC CALCULATIONS TO DETERMINE THE EFFECTIVE RANK a
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS THEN DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IN THE 1-NORM IS LESS THAN 1/tol.
!       IF tol=0 IS SPECIFIED THE NUMERICAL RANK OF a IS DETERMINED.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE EFFECTIVE RANK OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING krank=0  AND tol=RELATIVE PRECISION OF THE ELEMENTS
!   IN a. IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD
!   BE USED. ALSO, IT MAY BE HELPFUL TO SCALE THE COLUMNS OF a SO THAT ALL ELEMENTS 
!   ARE ABOUT THE SAME ORDER OF MAGNITUDE.
!   
!   ON EXIT, IF THE OPTIONAL INTEGER ARRAY ip IS PRESENT, ip STORES THE PERMUTATION MATRIX
!   P IN THE QR OR COMPLETE DECOMPOSITION OF a.
!   IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!   THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!   IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND IS OUTPUT IN b .
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF llsq_qr_solve2 .
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL LOGICAL PARAMETER min_norm IS
!   PRESENT AND IS SET TO true. OTHERWISE, SOLUTION(S) ARE COMPUTED SUCH THAT IF THE jTH COLUMN
!   OF a IS OMITTED FROM THE BASIS, x[j] OR x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED CORRECTLY.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    else
!
        test_lin = true
!
    end if
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
        err = maxval( sum( abs( matmul( transpose(b), a2 ) ), dim=2 ) )/ sum( abs(a2) )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2 )
!
    end if
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    if ( krank/=mn ) then
        write (prtunit,*) 'Indices of linearly dependent columns    = ', ip(krank+1:n)
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
      m, ' by ', n,' real coefficient matrix and a ', m, ' by ', nrhs,       &
      ' right hand side matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, ip )
!
!
! END OF PROGRAM ex2_llsq_qr_solve2
! =================================
!
end program ex2_llsq_qr_solve2

ex2_llsq_svd_solve.F90

program ex2_llsq_svd_solve
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LLSQ_SVD_SOLVE
!   in module LLSQ_Procedures for solving linear least squares problems using an
!   SVD decomposition of the coefficient matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c500, lamch, norm,        &
                         print_array, llsq_svd_solve, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! nrhs IS THE NUMBER OF COLUMNS OF THE RIGHT HAND SIDE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=2000, mn=min(m,n), nrhs=10
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c500
!
    character(len=*), parameter :: name_proc='Example 2 of llsq_svd_solve'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd) :: err, err1, err2, ulp, eps, tol, anorm, cond, sfmin, elapsed_time
    real(stnd), allocatable, dimension(:)   :: sing_values, rnorm, bnorm
    real(stnd), allocatable, dimension(:,:) :: a, a2, b, b2, res, res2, x
!
    integer(i4b) :: krank, j
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, do_test, do_print
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SOLVE A LINEAR LEAST SQUARES SYSTEM WITH ONE RIGHT HAND-SIDE
!               BY THE SINGULAR VALUE DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE
!   AND IF DETAILED RESULTS MUST BE PRINTED.
!
    do_test  = true
    do_print = false
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m,nrhs), x(n,nrhs), sing_values(mn), &
              bnorm(nrhs), rnorm(nrhs), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-by-n REAL RANDOM COEFFICIENT MATRIX a .
!
    call random_number( a )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    j = min( n, 5_i4b ) 
    a(:m,j) = a(:m,1_i4b) + a(:m,2_i4b)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE MATRIX b .
!
    call random_number( b )
!
!   COMPUTE THE NORMS OF THE nrhs DEPENDENT VARIABLES b .
!
    bnorm(:nrhs) = norm( b(:m,:nrhs), dim=2_i4b )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), b2(m,nrhs), res(m,nrhs), res2(nrhs,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       SAVE DATA MATRIX .
!
        a2(:m,:n) = a(:m,:n)
!
!       SAVE RIGHT HAND SIDE MATRIX .
!
        b2(:m,:nrhs) = b(:m,:nrhs)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   llsq_svd_solve COMPUTES THE MINIMUM NORM SOLUTION TO A REAL LINEAR LEAST
!   SQUARES PROBLEM :
!
!                       Minimize || b - a*x ||_2
!
!   USING THE SINGULAR VALUE DECOMPOSITION (SVD) OF a. A IS AN m-BY-n MATRIX
!   WHICH MAY BE RANK-DEFICIENT. b AND x CAN BE VECTORS OF MATRICES, BUT THEIR
!   SHAPES MUST BE CONFORMABLE WITH THE SHAPE OF a.
!
!   IN OTHER WORDS, IF b AND x ARE MATRICES, SEVERAL RIGHT HAND SIDE VECTORS b
!   AND SOLUTION VECTORS x CAN BE HANDLED IN A SINGLE CALL; THEY ARE STORED AS
!   THE COLUMNS OF THE m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX b, RESPECTIVELY.
!
!   THE EFFECTIVE RANK OF a, krank,IS DETERMINED BY TREATING AS ZERO THOSE
!   SINGULAR VALUES WHICH ARE LESS THAN tol TIMES THE LARGEST SINGULAR VALUE.
!
    call llsq_svd_solve( a, b, failure, x,                                            &
                         singvalues=sing_values, krank=krank, rnorm=rnorm, tol=tol )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUAL MATRIX IS ORTHOGONAL TO THE RANGE OF a .
!
        res(:m,:nrhs)  = b2(:m,:nrhs) - matmul( a2(:m,:n), x(:n,:nrhs) )
        res2(:nrhs,:n) = matmul( transpose(res(:m,:nrhs)), a2(:m,:n) )
!
        err1 = maxval( abs(res2(:nrhs,:n)) )/anorm
!
!       CHECK THE NORMS OF THE COLUMNS OF RESIDUAL MATRIX.
!
        err2 = maxval( abs( norm( res(:m,:nrhs), dim=2_i4b ) - rnorm(:nrhs) ) )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, b2, res, res2 )
!
    end if

    write (prtunit,*) err1, err2, eps, failure

!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!   GET MACHINE CONSTANT sfmin, SUCH THAT 1/sfmin DOES NOT OVERFLOW.
!
    sfmin = lamch( 's' )
!
!   COMPUTE THE CONDITION NUMBER OF a(:m,:n) IN THE 2-NORM
!
!            singvalues(1)/singvalues(min(m,n)) .
!
    if ( sing_values(mn)/sing_values(1_i4b)<=sfmin ) then
        cond = huge( cond )
    else
        cond = sing_values(1_i4b)/sing_values(mn)
    end if
!
!   PRINT RESULTS .
!
    write (prtunit,*)
    write (prtunit,*)
    write (prtunit,*) 'Least squares solution via Singular Value Decomposition'
    write (prtunit,*)
    write (prtunit,*) '    min of ||a(:,:)*x(:,:)-b(:,:)||**2 for matrix x(:,:) '
    write (prtunit,*)
    write (prtunit,*) 'Tolerance for zero singular values (tol*sing_values(1)):',tol*sing_values(1)
    write (prtunit,*)
    write (prtunit,*) 'Condition number (in the 2-norm) of a :',cond
    write (prtunit,*) 'Rank of a                             :',krank
    write (prtunit,*)
    write (prtunit,*) 'Residual sum of squares     ||a*x(:,i)-b(:,i)||**2               :',rnorm(:nrhs)**2
    write (prtunit,*) 'Residual sum of squares (%) ||a*x(:,i)-b(:,i)||**2/||b(:,i)||**2 :',(rnorm(:nrhs)/bnorm(:nrhs))**2
    write (prtunit,*)
!
    if ( do_print ) then
!
!       PRINT DETAILED RESULTS.
!
        call print_array( sing_values, title=' Singular values of a ' )
!
        write (prtunit,*)
!
        call print_array( x, title=' Least squares solution matrix x ' )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, sing_values, rnorm, bnorm )
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
      m, ' by ', n,' real coefficient matrix and a ', m, ' by ', nrhs,       &
      ' right hand side matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_llsq_svd_solve
! =================================
!
end program ex2_llsq_svd_solve

ex2_lu_cmp.F90

program ex2_lu_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines LU_CMP and LU_SOLVE
!   in module Lin_Procedures for solving a real linear system with several right hand sides.
!
!
! LATEST REVISION : 09/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, true, false, lu_cmp, lu_solve,    &
                         norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE SIZE OF THE LINEAR SYSTEM TO BE SOLVED
! AND nrhs IS THE NUMBER OF RIGHT HANDE SIDES.
!
    integer(i4b), parameter :: prtunit=6, n=3000, nrhs=3000
!
    character(len=*), parameter :: name_proc='Example 2 of lu_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, d1, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, b, x, res
!
    integer(i4b), dimension(:), allocatable :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)   :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n MATRIX
!               AND SEVERAL RIGHT HAND-SIDES WITH THE LU DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n,nrhs), x(n,nrhs), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
!   GENERATE A n-by-nrhs RANDOM SOLUTION MATRIX x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-MATRIX PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE SOLUTION MATRIX FOR LINEAR SYSTEM
!
!            a*x = b .
!
!   BY COMPUTING THE LU DECOMPOSITION WITH PARTIAL PIVOTING AND
!   IMPLICIT ROW SCALING OF MATRIX a . IF ON OUTPUT OF lu_cmp
!   d1 IS DIFFERENT FROM ZERO THEN THE LINEAR SYSTEM IS NOT
!   SINGULAR AND CAN BE SOLVED BY SUBROUTINE lu_solve.
!
    call lu_cmp( a, ip, d1 )
!
    if ( d1==zero ) then
!
!       ANORMAL EXIT FROM lu_cmp SUBROUTINE, PRINT A WARNING.
!
        write (prtunit,*) 'Error in exit of LU_CMP subroutine, d1=', d1
!
    else
!
        call lu_solve( a, ip, b )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( d1/=zero .and. do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n,nrhs), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n,:nrhs) = b(:n,:nrhs) - x(:n,:nrhs)
        err = maxval( norm(res, dim=2_i4b ) /    &
                      norm(x,  dim=2_i4b  ) )/real(n,stnd)
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x, ip, res )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, b, x, ip )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. d1/=zero ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing the solutions of a linear real system of size ', &
       n, ' with ', nrhs,' right hand sides is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_lu_cmp
! =========================
!
end program ex2_lu_cmp

ex2_partial_qr_cmp.F90

program ex2_partial_qr_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines PARTIAL_QR_CMP
!   and ORTHO_GEN_QR in module QR_Procedures for computing a full QR decomposition
!   with column pivoting of a real matrix.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, c50, partial_qr_cmp,   &
                         ortho_gen_qr, norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX,
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, mn=min(m,n)
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of partial_qr_cmp'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, err1_col, eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, resid2, norma
    real(stnd), allocatable, dimension(:,:) :: a, a2, r, resid
!
    integer(i4b)                            :: krank, l, j, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, test_lin
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : COMPUTE A FULL QR DECOMPOSITION WITH COLUMN PIVOTING
!               OF A DATA MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
    test_lin = true
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
    if ( do_test ) then
!
        l = max( m, n )
!
    else
!
        l = n
!
    end if
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,l), diagr(mn), beta(mn), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS FOR LATER USE.
!
        allocate( a2(m,n), r(mn,n), resid(m,l), resid2(n), norma(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A FULL QR DECOMPOSITION WITH COLUMN PIVOTING OF RANDOM DATA MATRIX a
!   WITH SUBROUTINE partial_qr_cmp.
!
    call partial_qr_cmp( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol )
!
!    call partial_qr_cmp( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank )
!
!   partial_qr_cmp COMPUTES A (PARTIAL OR FULL) ORTHOGONAL FACTORIZATION OF A REAL
!   m-BY-n MATRIX. THE MATRIX MAY BE RANK-DEFICIENT.
!
!   HERE THE ROUTINE FIRST COMPUTES A FULL QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF partial_qr_cmp.
!
!   IN OTHER WORDS, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R (and a), WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER (IN THE 1-NORM) IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R (and a) IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE RANK OF R AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R ARE NOT
!       PERFORMED AND THE RANK OF R IS ASSUMED TO BE EQUAL TO mn = min(m,n).
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE OF FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!   
!   IN A SECOND STEP, IF THE OPTIONAL PARAMETER tau IS PRESENT, THEN R22 IS CONSIDERED
!   TO BE NEGLIGIBLE AND THE SUBMATRIX R12 IS ANNIHILATED BY ORTHOGONAL TRANSFORMATIONS
!   FROM THE RIGHT, ARRIVING AT THE COMPLETE ORTHOGONAL FACTORIZATION:
!  
!                     a * P ≈ Q * [ T11 0 ] * Z
!                                 [  0  0 ]
!   
!   WHERE P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX,
!   T11 IS A krank-BY-krank UPPER TRIANGULAR MATRIX AND Z IS A n-BY-n
!   ORTHOGONAL MATRIX. P AND Q ARE ARE ALREADY COMPUTED IN THE QR FACTORIZATION
!   WITH COLUM PIVOTING.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR OR COMPLETE 
!   ORTHOGONAL FACTORIZATION, DEPENDING IF ARGUMENT tau IS ABSENT OR PRESENT.
!    
!   IF THE OPTIONAL PARAMETER tau IS ABSENT, partial_qr_cmp COMPUTES ONLY
!   A QR FACTORIZATION WITH COLUMN PIVOTING OF a AND:
!
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!    
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G. THE RANK OF R11).
!                      
!   ON THE OTHER HAND, IF THE OPTIONAL PARAMETER tau IS PRESENT, partial_qr_cmp COMPUTES A
!   COMPLETE ORTHOGONAL FACTORIZATION FROM THE QR FACTORIZATION WITH COLUMN PIVOTING OF a
!   AND:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY SECTION a(1:krank,1:krank)
!     CONTAIN THE CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX T11. THE ELEMENTS OF
!     THE DIAGONAL OF T11 ARE STORED IN THE ARRAY SECTION diagr(1:krank).
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE COMPLETE DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!
!   - THE ORTHOGONAL MATRIX Z IS STORED IN FACTORED FORM IN THE ARRAY SECTIONS
!     a(:krank,krank+1:n) AND tau(:krank).
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a, I.E., THE ORDER OF
!     THE SUBMATRIX T11.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED CORRECTLY.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    end if
!
    if ( do_test ) then
!
!       RESTORE TRIANGULAR FACTOR R OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       IN MATRIX r(:mn,:n) .
!
        do j = 1_i4b, mn
!
            r(1_i4b:j-1_i4b,j) = a(1_i4b:j-1_i4b,j)
            r(j,j)             = diagr(j)
            r(j+1_i4b:mn,j)    = zero
!
        end do
!
        do j = mn+1_i4b, n
!
            r(1_i4b:mn,j) = a(1_i4b:mn,j)
!
        end do
!
!       GENERATE ORTHOGONAL MATRIX Q OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       AND AN ORTHOGONAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!       a IS NOT ASSUMED OF FULL RANK.
!
        call ortho_gen_qr( a(:m,:m), beta(:krank) )
!
!       ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS A PRODUCT OF k ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(k)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp AND partial_rqr_cmp2.
!
!       THE SIZE OF beta DETERMINES THE NUMBER k OF ELEMENTARY REFLECTORS
!       WHOSE PRODUCT DEFINES THE MATRIX Q.
!
!       NOW a(:m,:krank) IS AN ORTHONORMAL BASIS FOR THE RANGE OF a AND a(:m,krank+1:m) IS
!       AN ORTHONORMAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!
!       APPLY PERMUTATION TO a .
!
        do j = 1_i4b, n
!
            a2(:m,j) = resid(:m,ip(j))
!
        end do
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*P - Q(:m,:krank)*R(:krank,:n).
!
        resid(:m,:n) = a2(:m,:n) - matmul( a(:m,:krank), r(:krank,:n) )
        resid2(:n)   = norm( resid(:m,:n), dim=2_i4b )
        norma(:n)    = norm( a2(:m,:n), dim=2_i4b )
!
        err1_col     = maxval( resid2(:n) / norma(:n) )
        err1         = norm( resid2(:n) )/ norm( norma(:n) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q'*Q.
!
        call unit_matrix( resid(:m,:m) )
!
        resid(:m,:m) = abs( resid(:m,:m) - matmul( transpose(a(:m,:m)), a(:m,:m) ) )
        err2 = maxval( resid(:m,:m) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF (a*P)(:m,:krank) AND ITS ORTHOGONAL COMPLEMENT Q(:m,krank+1:m).
!
        if ( m>krank ) then
!
            resid(:krank,krank+1:m) = matmul( transpose(a2(:m,:krank)), a(:m,krank+1:m) )
            err3 = maxval( abs( resid(:krank,krank+1:m) ) )/real(m,stnd)
!
        else
!
            err3 = zero
!
        end if
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, r, resid, resid2, norma )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Estimated rank of the matrix            &
                          &                                        = ', krank
!
        if ( krank/=mn ) then
            write (prtunit,*) 'Indices of linearly dependent columns   &
                              &                                        = ', ip(krank+1:n)
        end if
!        
        write (prtunit,*) 'Accuracy of the QR decomposition        &
                          &||A - Q*R||/||A||                       = ', err1
!        
        write (prtunit,*) 'Accuracy of the QR decomposition        &
                          &max( ||A(:,i) - Q*R(:,i)||/||A(:,i)|| ) = ', err1_col
        write (prtunit,*) 'Orthogonality of the Q matrix           &
                          &                                        = ', err2
!
        if ( m>krank ) then
            write (prtunit,*) 'Orthogonality of the range of the matrix&
                              & and its orthogonal complement          = ', err3
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a QR decomposition with column pivoting of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip )
!
!
! END OF PROGRAM ex2_partial_qr_cmp
! =================================
!
end program ex2_partial_qr_cmp

ex2_partial_rqr_cmp.F90

program ex2_partial_rqr_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines PARTIAL_RQR_CMP
!   and ORTHO_GEN_QR in module QR_Procedures for computing a full randomized QR
!   decomposition with column pivoting of a real matrix.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, c50, partial_rqr_cmp,   &
                         ortho_gen_qr, norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX,
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, mn=min( m, n )
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of partial_rqr_cmp'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, err1_col, eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, resid2, norma
    real(stnd), allocatable, dimension(:,:) :: a, a2, r, resid
!
    integer(i4b)                            :: krank, l, j, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, test_lin
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : COMPUTE A RANDOMIZED FULL QR DECOMPOSITION WITH COLUMN PIVOTING
!               OF A DATA MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
    test_lin = true
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
    if ( do_test ) then
!
        l = max( m, n )
!
    else
!
        l = n
!
    end if
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,l), diagr(mn), beta(mn), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS FOR LATER USE.
!
        allocate( a2(m,n), r(mn,n), resid(m,l), resid2(n), norma(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A RANDOMIZED FULL QR DECOMPOSITION WITH COLUMN PIVOTING OF RANDOM DATA MATRIX a
!   WITH SUBROUTINE partial_rqr_cmp.
!
    call partial_rqr_cmp( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol )
!
!    call partial_rqr_cmp( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank )
!
!   partial_rqr_cmp COMPUTES A  RANDOMIZED (PARTIAL OR FULL) ORTHOGONAL FACTORIZATION OF A REAL
!   m-BY-n MATRIX. THE MATRIX MAY BE RANK-DEFICIENT.
!
!   HERE THE ROUTINE FIRST COMPUTES A RANDOMIZED FULL QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF partial_rqr_cmp.
!
!   IN OTHER WORDS, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R (and a), WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER (IN THE 1-NORM) IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R (and a) IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE RANK OF R AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R ARE NOT
!       PERFORMED AND THE RANK OF R IS ASSUMED TO BE EQUAL TO mn = min(m,n).
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE OF FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!   
!   IN A SECOND STEP, IF THE OPTIONAL PARAMETER tau IS PRESENT, THEN R22 IS CONSIDERED
!   TO BE NEGLIGIBLE AND THE SUBMATRIX R12 IS ANNIHILATED BY ORTHOGONAL TRANSFORMATIONS
!   FROM THE RIGHT, ARRIVING AT THE COMPLETE ORTHOGONAL FACTORIZATION:
!  
!                     a * P ≈ Q * [ T11 0 ] * Z
!                                 [  0  0 ]
!   
!   WHERE P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX,
!   T11 IS A krank-BY-krank UPPER TRIANGULAR MATRIX AND Z IS A n-BY-n
!   ORTHOGONAL MATRIX. P AND Q ARE ARE ALREADY COMPUTED IN THE QR FACTORIZATION
!   WITH COLUM PIVOTING.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR OR COMPLETE 
!   ORTHOGONAL FACTORIZATION, DEPENDING IF ARGUMENT tau IS ABSENT OR PRESENT.
!    
!   IF THE OPTIONAL PARAMETER tau IS ABSENT, partial_rqr_cmp COMPUTES ONLY
!   A RANDOMIZED QR FACTORIZATION WITH COLUMN PIVOTING OF a AND:
!
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!    
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G. THE RANK OF R11).
!                      
!   ON THE OTHER HAND, IF THE OPTIONAL PARAMETER tau IS PRESENT, partial_rqr_cmp COMPUTES A
!   COMPLETE ORTHOGONAL FACTORIZATION FROM THE RANDOMIZED QR FACTORIZATION WITH COLUMN PIVOTING
!   OF a AND:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY SECTION a(1:krank,1:krank)
!     CONTAIN THE CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX T11. THE ELEMENTS OF
!     THE DIAGONAL OF T11 ARE STORED IN THE ARRAY SECTION diagr(1:krank).
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE COMPLETE DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!
!   - THE ORTHOGONAL MATRIX Z IS STORED IN FACTORED FORM IN THE ARRAY SECTIONS
!     a(:krank,krank+1:n) AND tau(:krank).
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a, I.E., THE ORDER OF
!     THE SUBMATRIX T11.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED CORRECTLY.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    end if
!
    if ( do_test ) then
!
!       RESTORE TRIANGULAR FACTOR R OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       IN MATRIX r(:mn,:n) .
!
        do j = 1_i4b, mn
!
            r(1_i4b:j-1_i4b,j) = a(1_i4b:j-1_i4b,j)
            r(j,j)             = diagr(j)
            r(j+1_i4b:mn,j)    = zero
!
        end do
!
        do j = mn+1_i4b, n
!
            r(1_i4b:mn,j) = a(1_i4b:mn,j)
!
        end do
!
!       GENERATE ORTHOGONAL MATRIX Q OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       AND AN ORTHOGONAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!       a IS NOT ASSUMED OF FULL RANK.
!
        call ortho_gen_qr( a(:m,:m), beta(:krank) )
!
!       ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS A PRODUCT OF k ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(k)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp AND partial_rqr_cmp2.
!
!       THE SIZE OF beta DETERMINES THE NUMBER k OF ELEMENTARY REFLECTORS
!       WHOSE PRODUCT DEFINES THE MATRIX Q.
!
!       NOW a(:m,:krank) IS AN ORTHONORMAL BASIS FOR THE RANGE OF a AND a(:m,krank+1:m) IS
!       AN ORTHONORMAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!
!       APPLY PERMUTATION TO a .
!
        do j = 1_i4b, n
!
            a2(:m,j) = resid(:m,ip(j))
!
        end do
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*P - Q(:m,:krank)*R(:krank,:n).
!
        resid(:m,:n) = a2(:m,:n) - matmul( a(:m,:krank), r(:krank,:n) )
        resid2(:n)   = norm( resid(:m,:n), dim=2_i4b )
        norma(:n)    = norm( a2(:m,:n), dim=2_i4b )
!
        err1_col     = maxval( resid2(:n) / norma(:n) )
        err1         = norm( resid2(:n) )/ norm( norma(:n) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q'*Q.
!
        call unit_matrix( resid(:m,:m) )
!
        resid(:m,:m) = abs( resid(:m,:m) - matmul( transpose(a(:m,:m)), a(:m,:m) ) )
        err2 = maxval( resid(:m,:m) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF (a*P)(:m,:krank) AND ITS ORTHOGONAL COMPLEMENT Q(:m,krank+1:m).
!
        if ( m>krank ) then
!
            resid(:krank,krank+1:m) = matmul( transpose(a2(:m,:krank)), a(:m,krank+1:m) )
            err3 = maxval( abs( resid(:krank,krank+1:m) ) )/real(m,stnd)
!
        else
!
            err3 = zero
!
        end if
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, r, resid, resid2, norma )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Estimated rank of the matrix            &
                          &                                        = ', krank
!
        if ( krank/=mn ) then
            write (prtunit,*) 'Indices of linearly dependent columns   &
                              &                                        = ', ip(krank+1:n)
        end if
!        
        write (prtunit,*) 'Accuracy of the QR decomposition        &
                          &||A - Q*R||/||A||                       = ', err1
!        
        write (prtunit,*) 'Accuracy of the QR decomposition        &
                          &max( ||A(:,i) - Q*R(:,i)||/||A(:,i)|| ) = ', err1_col
        write (prtunit,*) 'Orthogonality of the Q matrix           &
                          &                                        = ', err2
!
        if ( m>krank ) then
            write (prtunit,*) 'Orthogonality of the range of the matrix&
                              & and its orthogonal complement          = ', err3
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a randomized QR decomposition with column pivoting of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip )
!
!
! END OF PROGRAM ex2_partial_rqr_cmp
! ==================================
!
end program ex2_partial_rqr_cmp

ex2_partial_rqr_cmp2.F90

program ex2_partial_rqr_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines PARTIAL_RQR_CMP2
!   and ORTHO_GEN_QR in module QR_Procedures for computing a full randomized QR
!   decomposition with column pivoting of a real matrix.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, c50, partial_rqr_cmp2,   &
                         ortho_gen_qr, norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!    
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit = 6, m=4000, n=3000, mn=min( m, n )
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of partial_rqr_cmp2'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, err1_col, eps, ulp, tol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diagr, beta, resid2, norma
    real(stnd), allocatable, dimension(:,:) :: a, a2, r, resid
!
    integer(i4b)                            :: krank, l, j, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, test_lin
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : COMPUTE A RANDOMIZED FULL QR DECOMPOSITION WITH COLUMN PIVOTING
!               OF A DATA MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = fudge*ulp
!
    err = zero
    test_lin = true
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
    if ( do_test ) then
!
        l = max( m, n )
!
    else
!
        l = n
!
    end if
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,l), diagr(mn), beta(mn), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS FOR LATER USE.
!
        allocate( a2(m,n), r(mn,n), resid(m,l), resid2(n), norma(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        resid(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A RANDOMIZED FULL QR DECOMPOSITION WITH COLUMN PIVOTING OF RANDOM DATA MATRIX a
!   WITH SUBROUTINE partial_rqr_cmp2.
!
    call partial_rqr_cmp2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol )
!
!    call partial_rqr_cmp2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank )
!
!   partial_rqr_cmp2 COMPUTES A  RANDOMIZED (PARTIAL OR FULL) ORTHOGONAL FACTORIZATION OF A REAL
!   m-BY-n MATRIX. THE MATRIX MAY BE RANK-DEFICIENT.
!
!   HERE THE ROUTINE FIRST COMPUTES A RANDOMIZED FULL QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF partial_rqr_cmp2.
!
!   IN OTHER WORDS, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R (and a), WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER (IN THE 1-NORM) IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R (and a) IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE RANK OF R AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R ARE NOT
!       PERFORMED AND THE RANK OF R IS ASSUMED TO BE EQUAL TO mn = min(m,n).
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE OF FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!   
!   IN A SECOND STEP, IF THE OPTIONAL PARAMETER tau IS PRESENT, THEN R22 IS CONSIDERED
!   TO BE NEGLIGIBLE AND THE SUBMATRIX R12 IS ANNIHILATED BY ORTHOGONAL TRANSFORMATIONS
!   FROM THE RIGHT, ARRIVING AT THE COMPLETE ORTHOGONAL FACTORIZATION:
!  
!                     a * P ≈ Q * [ T11 0 ] * Z
!                                 [  0  0 ]
!   
!   WHERE P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX,
!   T11 IS A krank-BY-krank UPPER TRIANGULAR MATRIX AND Z IS A n-BY-n
!   ORTHOGONAL MATRIX. P AND Q ARE ARE ALREADY COMPUTED IN THE QR FACTORIZATION
!   WITH COLUM PIVOTING.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR OR COMPLETE 
!   ORTHOGONAL FACTORIZATION, DEPENDING IF ARGUMENT tau IS ABSENT OR PRESENT.
!    
!   IF THE OPTIONAL PARAMETER tau IS ABSENT, partial_rqr_cmp2 COMPUTES ONLY
!   A RANDOMIZED QR FACTORIZATION WITH COLUMN PIVOTING OF a AND:
!
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!    
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G. THE RANK OF R11).
!                      
!   ON THE OTHER HAND, IF THE OPTIONAL PARAMETER tau IS PRESENT, partial_rqr_cmp2 COMPUTES A
!   COMPLETE ORTHOGONAL FACTORIZATION FROM THE RANDOMIZED QR FACTORIZATION WITH COLUMN PIVOTING
!   OF a AND:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY SECTION a(1:krank,1:krank)
!     CONTAIN THE CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX T11. THE ELEMENTS OF
!     THE DIAGONAL OF T11 ARE STORED IN THE ARRAY SECTION diagr(1:krank).
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE COMPLETE DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!
!   - THE ORTHOGONAL MATRIX Z IS STORED IN FACTORED FORM IN THE ARRAY SECTIONS
!     a(:krank,krank+1:n) AND tau(:krank).
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a, I.E., THE ORDER OF
!     THE SUBMATRIX T11.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED CORRECTLY.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    end if
!
    if ( do_test ) then
!
!       RESTORE TRIANGULAR FACTOR R OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       IN MATRIX r(:mn,:n) .
!
        do j = 1_i4b, mn
!
            r(1_i4b:j-1_i4b,j) = a(1_i4b:j-1_i4b,j)
            r(j,j)             = diagr(j)
            r(j+1_i4b:mn,j)    = zero
!
        end do
!
        do j = mn+1_i4b, n
!
            r(1_i4b:mn,j) = a(1_i4b:mn,j)
!
        end do
!
!       GENERATE ORTHOGONAL MATRIX Q OF QR DECOMPOSITION OF RANDOM DATA MATRIX a
!       AND AN ORTHOGONAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!       a IS NOT ASSUMED OF FULL RANK.
!
        call ortho_gen_qr( a(:m,:m), beta(:krank) )
!
!       ortho_gen_qr GENERATES AN m-BY-m REAL ORTHOGONAL MATRIX, WHICH IS
!       DEFINED AS A PRODUCT OF k ELEMENTARY REFLECTORS OF ORDER m
!
!            Q = h(1)*h(2)* ... *h(k)
!
!       AS RETURNED BY qr_cmp, qr_cmp2, partial_qr_cmp, partial_rqr_cmp AND partial_rqr_cmp2.
!
!       THE SIZE OF beta DETERMINES THE NUMBER k OF ELEMENTARY REFLECTORS
!       WHOSE PRODUCT DEFINES THE MATRIX Q.
!
!       NOW a(:m,:krank) IS AN ORTHONORMAL BASIS FOR THE RANGE OF a AND a(:m,krank+1:m) IS
!       AN ORTHONORMAL BASIS FOR THE ORTHOGONAL COMPLEMENT TO THE RANGE OF a .
!
!       APPLY PERMUTATION TO a .
!
        do j = 1_i4b, n
!
            a2(:m,j) = resid(:m,ip(j))
!
        end do
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*P - Q(:m,:krank)*R(:krank,:n).
!
        resid(:m,:n) = a2(:m,:n) - matmul( a(:m,:krank), r(:krank,:n) )
        resid2(:n)   = norm( resid(:m,:n), dim=2_i4b )
        norma(:n)    = norm( a2(:m,:n), dim=2_i4b )
!
        err1_col     = maxval( resid2(:n) / norma(:n) )
        err1         = norm( resid2(:n) )/ norm( norma(:n) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q'*Q.
!
        call unit_matrix( resid(:m,:m) )
!
        resid(:m,:m) = abs( resid(:m,:m) - matmul( transpose(a(:m,:m)), a(:m,:m) ) )
        err2 = maxval( resid(:m,:m) )/real(m,stnd)
!
!       CHECK ORTHOGONALITY OF (a*P)(:m,:krank) AND ITS ORTHOGONAL COMPLEMENT Q(:m,krank+1:m).
!
        if ( m>krank ) then
!
            resid(:krank,krank+1:m) = matmul( transpose(a2(:m,:krank)), a(:m,krank+1:m) )
            err3 = maxval( abs( resid(:krank,krank+1:m) ) )/real(m,stnd)
!
        else
!
            err3 = zero
!
        end if
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, r, resid, resid2, norma )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Estimated rank of the matrix            &
                          &                                        = ', krank
!
        if ( krank/=mn ) then
            write (prtunit,*) 'Indices of linearly dependent columns   &
                              &                                        = ', ip(krank+1:n)
        end if
!        
        write (prtunit,*) 'Accuracy of the QR decomposition        &
                          &||A - Q*R||/||A||                       = ', err1
!        
        write (prtunit,*) 'Accuracy of the QR decomposition        &
                          &max( ||A(:,i) - Q*R(:,i)||/||A(:,i)|| ) = ', err1_col
        write (prtunit,*) 'Orthogonality of the Q matrix           &
                          &                                        = ', err2
!
        if ( m>krank ) then
            write (prtunit,*) 'Orthogonality of the range of the matrix&
                              & and its orthogonal complement          = ', err3
        end if
!
    end if
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing a randomized QR decomposition with column pivoting of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, diagr, beta, ip )
!
!
! END OF PROGRAM ex2_partial_rqr_cmp2
! ===================================
!
end program ex2_partial_rqr_cmp2

ex2_permute_cor.F90

program ex2_permute_cor
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_COR and PERMUTE_COR
!   in module Mul_Stat_Procedures for performing permutation tests on a series of correlation
!   coefficients.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_cor, permute_cor, random_seed_, random_number_
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT,
! p       IS THE NUMBER OF OBSERVATIONS OF THE RANDOM VECTORS,
! nrep    IS THE NUMBER OF SHUFFLES FOR THE PERMUTATION TEST,
! nsample IS THE NUMBER OF SAMPLES TO EVALUATE THE REJECTION RATE OF THE TEST.
!
    integer(i4b), parameter :: prtunit=6, p=50, nrep=99, nsample=3000
!
! sign_level IS THE SIGNIFICANCE LEVEL OF THE PERMUTATION TEST
! eps        IS THE ALLOWED RELATIVE ERROR FOR THE REJECTION RATE
!
    real(stnd),  parameter :: sign_level=0.05, eps=0.1
!
    character(len=*), parameter :: name_proc='Example 2 of permute_cor'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                       :: xyn, err_prob, prob_rej_rate
    real(stnd), dimension(nsample)   :: xycor, prob
    real(stnd), dimension(nsample,2) :: xstat
    real(stnd), dimension(2)         :: ystat
    real(stnd), dimension(nsample,p) :: x
    real(stnd), dimension(p)         :: y
!
    integer(i4b) :: rej_rate
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   INITIALIZE THE RANDOM GENERATOR.
!
    call random_seed_()
!
!   GENERATE A RANDOM UNIFORM OBSERVATION VECTOR y .
!
    call random_number_( y(:p) )
!
!   GENERATE A RANDOM UNIFORM OBSERVATION ARRAY x .
!
    call random_number_( x(:nsample,:p) )
!
!   COMPUTE THE CORRELATIONS BETWEEN x AND y
!   FOR THE p OBSERVATIONS.
!
    first = true
    last  = true
    call comp_cor( x(:nsample,:p), y(:p), first, last, xstat(:nsample,:2), ystat(:2),    &
                   xycor(:nsample), xyn )
!
!   ON EXIT OF COMP_COR WHEN last=true :
!
!      xstat(:nsample,1)     CONTAINS THE MEAN VALUES OF THE OBSERVATION ARRAY x(:nsample,:p).
!
!      xstat(:nsample,2)     CONTAINS THE VARIANCES OF THE OBSERVATION ARRAY x(:nsample,:p).
!
!      ystat(1)              CONTAINS THE MEAN VALUE OF THE OBSERVATION VECTOR  y(:p).
!
!      ystat(2)              CONTAINS THE VARIANCE OF THE OBSERVATION VECTOR  y(:p).
!
!      xycor(:nsample)       CONTAINS THE CORRELATION COEFFICIENTS BETWEEN x(:nsample,:p) AND y(:p).
!
!      xyn                   CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAYS
!                            x(:nsample,:p) AND  y(:p) (xyn=real(p,stnd) ).
!
!
!   NOW COMPUTE A PERMUTATION TEST OF THE CORRELATION BETWEEN x AND y WITH
!   SUBROUTINE permute_cor WITH nrep SHUFFLES.
!
    call permute_cor( x(:nsample,:p), y(:p), xstat(:nsample,:2), ystat(:2),   &
                      xycor(:nsample), prob(:nsample), nrep=nrep )
!
!   NOW COMPUTE THE REJECTION RATE OF THE TEST. IDEALLY, FOR THE sign_level
!   SIGNIFICANCE LEVEL, WE WOULD REJECT ONLY THE NULL HYPOTHESIS sign_level %
!   OF THE TIME. IN OTHER WORDS, THE REJECTION RATE MUST BE APPROXIMATELY EQUAL
!   TO THE SIGNIFICANCE LEVEL sign_level .
!
    rej_rate      = count( prob(:nsample)<=sign_level )
    prob_rej_rate = real( rej_rate, stnd )/real( nsample, stnd )
!
!   COMPUTE THE RELATIVE ERROR OF THE REJECTION RATE.
!
    err_prob = abs( (prob_rej_rate-sign_level)/sign_level )
!
!   CHECK THAT THE RELATIVE ERROR OF THE REJECTION RATE IS LESS THAN eps .
!
    if ( err_prob<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_permute_cor
! ==============================
!
end program ex2_permute_cor

ex2_phase_scramble_cor.F90

program ex2_phase_scramble_cor
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines COMP_COR and PHASE_SCRAMBLE_COR
!   in module Mul_Stat_Procedures for performing phase-scrambled bootstrap tests on a series of
!   correlation coefficients.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, lgl, true, comp_cor, phase_scramble_cor, pinvn
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! p       IS THE NUMBER OF OBSERVATIONS OF THE TIME SERIES VECTORS;
! nrep    IS THE NUMBER OF SHUFFLES FOR THE PHASE-SCRAMBLED BOOTSTRAP TEST;
! nsample IS THE NUMBER OF SAMPLES TO EVALUATE THE REJECTION RATE OF THE TEST.
!
    integer(i4b), parameter :: prtunit=6, p=50, nrep=99, nsample=2000
!
! sign_level IS THE SIGNIFICANCE LEVEL OF PHASE-SCRAMBLED BOOTSTRAP TEST;
! eps        IS THE ALLOWED RELATIVE ERROR FOR THE REJECTION RATE;
! b          IS THE LAG-1 AUTOCORRELATION FOR THE AR(1) MODEL USED
!            TO GENERATE THE TIME SERIES
!
    real(stnd),  parameter :: sign_level=0.05, eps=0.2, b=0.2
!
    character(len=*), parameter :: name_proc='Example 2 of phase_scramble_cor'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                       :: xyn, err_prob, prob_rej_rate
    real(stnd), dimension(nsample)   :: xycor, prob
    real(stnd), dimension(nsample,2) :: xstat
    real(stnd), dimension(2)         :: ystat
    real(stnd), dimension(nsample,p) :: x, e
    real(stnd), dimension(p)         :: y, e2
!
    integer(i4b) :: j, rej_rate
!
    logical(lgl) :: first, last
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   INITIALIZE THE RANDOM GENERATOR.
!
    call random_seed()
!
!   GENERATE A TIME SERIES USING AN AR(1) MODEL OF THE FORM
!
!               y(i+1) = b*y(i) + e2(i)
!
!   WHERE b IS THE SPECIFIED LAG-1 AUTOCORRELATION AND e2(i)
!   IS A NORMALLY DISTRIBUTED RANDOM VARIABLE WITH A 0 MEAN
!   AND A VARIANCE OF 1.
!
    call random_number( y(:p) )
    e2(:p) = pinvn( y(:p) )
!
    y(1) = e2(1)
    do j=2, p
        y(j) = b*y(j-1) + e2(j)
    end do
!
!   GENERATE nsample INDEPENDENT TIME SERIES FROM THE SAME AR(1) MODEL.
!
    call random_number( x(:nsample,:p) )
    e(:nsample,:p) = pinvn( x(:nsample,:p) )
!
    x(:nsample,1) = e(:nsample,1)
    do j=2, p
        x(:nsample,j) = b*x(:nsample,j-1) + e(:nsample,j)
    end do
!
!   COMPUTE THE CORRELATIONS BETWEEN x AND y
!   FOR THE p OBSERVATIONS.
!
    first = true
    last  = true
!
    call comp_cor( x(:nsample,:p), y(:p), first, last, xstat(:nsample,:2), ystat(:2),    &
                   xycor(:nsample), xyn )
!
!   ON EXIT OF COMP_COR WHEN last=true :
!
!      xstat(:nsample,1)     CONTAINS THE MEAN VALUES OF THE OBSERVATION ARRAY x(:nsample,:p).
!
!      xstat(:nsample,2)     CONTAINS THE VARIANCES OF THE OBSERVATION ARRAY x(:nsample,:p).
!
!      ystat(1)              CONTAINS THE MEAN VALUE OF THE OBSERVATION VECTOR  y(:p).
!
!      ystat(2)              CONTAINS THE VARIANCE OF THE OBSERVATION VECTOR  y(:p).
!
!      xycor(:nsample)       CONTAINS THE CORRELATION COEFFICIENTS BETWEEN x(:nsample,:p) AND y(:p).
!
!      xyn                   CONTAINS THE NUMBER OF OBSERVATIONS IN THE DATA ARRAYS
!                            x(:nsample,:p) AND  y(:p) (xyn=real(p,stnd) ).
!
!
!   NOW COMPUTE A THE PHASE-SCRAMBLED BOOTSTRAP TEST OF THE CORRELATION
!   BETWEEN x AND y WITH SUBROUTINE phase_scramble_cor WITH nrep SHUFFLES.
!
    call phase_scramble_cor( x(:nsample,:p), y(:p), xstat(:nsample,:2), ystat(:2),   &
                             xycor(:nsample), prob(:nsample), nrep=nrep )
!
!   NOW COMPUTE THE REJECTION RATE OF THE TEST. IDEALLY, FOR THE sign_level
!   SIGNIFICANCE LEVEL, WE WOULD REJECT ONLY THE NULL HYPOTHESIS sign_level %
!   OF THE TIME. IN OTHER WORDS, THE REJECTION RATE MUST BE APPROXIMATELY EQUAL
!   TO THE SIGNIFICANCE LEVEL sign_level .
!
    rej_rate      = count( prob(:nsample)<=sign_level )
    prob_rej_rate = real( rej_rate, stnd )/real( nsample, stnd )
!
!   COMPUTE THE RELATIVE ERROR OF THE REJECTION RATE.
!
    err_prob = abs( (prob_rej_rate-sign_level)/sign_level )
!
!   CHECK THAT THE RELATIVE ERROR OF THE REJECTION RATE IS LESS THAN eps .
!
    if ( err_prob<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_phase_scramble_cor
! =====================================
!
end program ex2_phase_scramble_cor

ex2_probq.F90

program ex2_probq
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBQ, PINVQ
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 26/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, false, one, c99, probq, pinvq
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-3_stnd 
!
    character(len=*), parameter :: name_proc='Example 2 of probq'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, x2
    real(stnd)                 :: err
!
    integer(i4b), dimension(n,m) :: ndf
!
    logical(lgl) :: upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF CHI-SQUARED DISTRIBUTION .
!
    call random_number( x2(:n,:m) )
!
    x2(:n,:m) = c99*x2(:n,:m)
    where( x2(:n,:m)<one )  x2(:n,:m) = x2(:n,:m) + one
!
    ndf(:n,:m) = x2(:n,:m)
!
!   GENERATE A RANDOM PROBABILITY MATRIX p(:m,:n) .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE QUANTILES x2(:m,:n) OF CHI-SQUARED DISTRIBUTION WITH ndf DEGREES OF FREEDOM
!   CORRESPONDING TO LOWER TAIL AREAS OF p(:m,:n) .
!
    x2(:n,:m) = pinvq( p(:n,:m), ndf(:n,:m) )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probq FUNCTION.
!
    upper = false
!
    p2(:n,:m) = probq( x2(:n,:m), ndf(:n,:m), upper=upper )
!
!   CHECK THAT p(:n,:m) AND p2(:n,:m) AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_probq
! ========================
!
end program ex2_probq

ex2_probq2.F90

program ex2_probq2
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBQ2, PINVQ2
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 27/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, false, one, c99, probq2, pinvq2
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-4_stnd 
!
    character(len=*), parameter :: name_proc='Example 2 of probq2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, x2, df
    real(stnd)                 :: err
!
    logical(lgl) :: upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF CHI-SQUARED DISTRIBUTION .
!   HERE df(:,:) ARE NOT NECESSARILY INTEGERS.
!
    call random_number( df(:n,:m) )
!
    df(:n,:m) = c99*df(:n,:m) + one
!
!   GENERATE A RANDOM PROBABILITY MATRIX p(:m,:n) .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE QUANTILES x2(:m,:n) OF CHI-SQUARED DISTRIBUTION WITH df(:m,:n) DEGREES OF FREEDOM
!   CORRESPONDING TO LOWER TAIL AREAS OF p(:m,:n) .
!
    x2(:n,:m) = pinvq2( p(:n,:m), df(:n,:m) )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probq FUNCTION.
!
    upper = false
!
    p2(:n,:m) = probq2( x2(:n,:m), df(:n,:m), upper=upper )
!
!   CHECK THAT p(:n,:m) AND p2(:n,:m) AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_probq2
! =========================
!
end program ex2_probq2

ex2_probstudent.F90

program ex2_probstudent
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBSTUDENT, PINVSTUDENT
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 26/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, stnd, false, two, c99, probstudent, pinvstudent
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-3_stnd 
!
    character(len=*), parameter :: name_proc='Example 2 of probstudent'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, t, df
    real(stnd)                 :: err
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF STUDENT T-DISTRIBUTION .
!
    call random_number( df(:n,:m) )
!
    df(:n,:m) = c99*df(:n,:m)
    where( df(:n,:m)<two )  df(:n,:m) = df(:n,:m) + two
!
!   GENERATE A RANDOM PROBABILITY MATRIX p(:n,:m) .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE TWO-TAIL QUANTILES t OF STUDENT T-DISTRIBUTION 
!   WITH df DEGREES OF FREEDOM CORRESPONDING TO AREAS OF p(:n,:m) .
!
    t(:n,:m) = pinvstudent( p(:n,:m), df(:n,:m) )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probstudent FUNCTION.
!
    p2(:n,:m) = probstudent( t(:n,:m), df(:n,:m) )
!
!   CHECK THAT p AND p2 AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_probstudent
! ==============================
!
end program ex2_probstudent

ex2_probt.F90

program ex2_probt
!
!
! Purpose
! =======
!
!   This program is intended to illustrate the use of functions PROBT, PINVT
!   in module Prob_Procedures .
!                                                                              
!                                                                              
! LATEST REVISION : 26/02/2013
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, false, one, c99, probt, pinvt
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT
!
    integer(i4b), parameter :: prtunit=6, n=5000, m=10000
!
    real(stnd), parameter :: eps = 1.0e-4_stnd 
!
    character(len=*), parameter :: name_proc='Example 2 of probt'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd), dimension(n,m) :: p, p2, t
    real(stnd)                 :: err
!
    integer(i4b), dimension(n,m) :: ndf
!
    logical(lgl) :: upper
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   GENERATE THE DEGREES OF FREEDOM OF STUDENT T-DISTRIBUTION .
!
    call random_number( t(:n,:m) )
!
    t(:n,:m) = c99*t(:n,:m)
    where( t(:n,:m)<one )  t(:n,:m) = t(:n,:m) + one
!
    ndf(:n,:m) = t(:n,:m)
!
!   GENERATE A RANDOM PROBABILITY MATRIX p(:,:) .
!
    call random_number( p(:n,:m) )
!
!   COMPUTE THE QUANTILES t(:,:) OF STUDENT T-DISTRIBUTION WITH ndf(:,:) DEGREES OF FREEDOM
!   CORRESPONDING TO LOWER TAIL AREAS OF p(:,:) .
!
    t(:n,:m) = pinvt( p(:n,:m), ndf(:n,:m) )
!
!   RECOMPUTE THE PROBABILITIES FROM THE QUANTILES
!   WITH probt FUNCTION.
!
    upper = false
!
    p2(:n,:m) = probt( t(:n,:m), ndf(:n,:m), upper=upper )
!
!   CHECK THAT p AND p2 AGREE.
!
    err = maxval( abs( p(:n,:m) - p2(:n,:m) ) )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
!
! END OF PROGRAM ex2_probt
! ========================
!
end program ex2_probt

ex2_qr_cmp.F90

program ex2_qr_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine QR_CMP in module QR_Procedures
!   and QR_SOLVE in module LLSQ_Procedures for solving linear least squares problems of full
!   rank using a QR decomposition.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c100, allocate_error, merror, &
                         qr_cmp, qr_solve, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=3000, mn=min( m, n )
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c100
!
    character(len=*), parameter :: name_proc='Example 2 of qr_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, anorm, rnorm, err1, err2, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, x, res, diagr, beta
    real(stnd), allocatable, dimension(:,:) :: a, a2
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A QR DECOMPOSITION OF THE COEFFICIENT MATRIX. THE COEFFICIENT
!               MATRIX IS ASSUMED OF FULL RANK, BUT BOTH m>=n OR m<n ARE PERMITTED.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n) ≈ b(:m) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF RESIDUAL VECTOR MUST BE COMPUTED.
!
    comp_resid = do_test
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), diagr(mn), beta(mn), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS FOR LATER USE.
!
        allocate( a2(m,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE DATA MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A QR FACTORIZATION OF RANDOM DATA MATRIX WITH SUBROUTINE qr_cmp.
!
    call qr_cmp( a(:m,:n), diagr(:mn), beta(:mn) )
!
!   qr_cmp COMPUTES AN ORTHOGONAL FACTORIZATION OF A REAL m-BY-n MATRIX
!   a . a IS ASSUMED OF FULL RANK. THE ROUTINE COMPUTES A QR FACTORIZATION
!   OF a AS:
!
!                     a = Q * R
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS ORTHOGONAL
!   FACTORIZATION. 
!
!   THE MATRIX Q IS REPRESENTED AS A PRODUCT OF ELEMENTARY REFLECTORS
!
!            Q = h(1)*h(2)* ... *h(k), WHERE k = min( size(a,1) , size(a,2) )
!
!   EACH h(i) HAS THE FORM
!
!            h(i) = I + BETA * ( V * V' ) ,
!                      
!   WHERE BETA IS A REAL SCALAR AND V IS A REAL m-ELEMENTS VECTOR WITH V(1:i-1) = 0.
!   V(i:m) IS STORED ON EXIT IN a(i:m,i) AND BETA IN beta(i).
!                      
!   THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!   CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX R. THE ELEMENTS
!   OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr. 
!
!   NOW, COMPUTE SOLUTION AND RESIDUAL VECTORS FOR LINEAR LEAST SQUARES SYSTEM
!   WITH SUBROUTINE qr_solve.
!
    call qr_solve( a(:m,:n), diagr(:mn), beta(:mn), b(:m), x(:n),  &
                   rnorm=rnorm, comp_resid=comp_resid )
!
!   qr_solve SOLVES OVERDETERMINED OR UNDERDETERMINED REAL LINEAR SYSTEMS
!
!                a(:m,:n)*x(:n) ≈ b(:m) .
!
!   WITH AN m-BY-n COEFFICIENT MATRIX a, USING AN ORTHOGONAL FACTORIZATION OF a, AS 
!   COMPUTED BY qr_cmp. m>=n OR n>m IS PERMITTED, BUT a IS ASSUMED OF FULL RANK.
!
!   b IS A m-ELEMENTS RIGHT HAND SIDE VECTOR AND x IS A n-ELEMENTS SOLUTION VECTOR. SEVERAL
!   RIGHT HAND SIDES AND SOLUTION VECTORS MAY BE HANDLE IN A SINGLE CALL. IN THIS CASE,
!   b IS AN m-BY-nrhs MATRIX AND x is AN n-BY-nrhs MATRIX. b IS OVERWRITTEN BY qr_solve.
!   
!   IT IS ASSUMED THAT qr_cmp  HAS BEEN USED TO COMPUTE THE ORTHOGONAL 
!   FACTORIZATION OF a BEFORE CALLING qr_solve.
!
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND OVERWRITES b.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR FOR THE SOLUTION VECTOR x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL REAL PARAMETER rnorm IN THE CALL OF qr_solve .
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
        res(:n) = matmul( b(:m), a2(:m,:n) )
!
        err1 = maxval( abs(res(:n)) )/anorm
!
!       CHECK THE NORM OF THE RESIDUAL VECTOR.
!
        err2 = abs( norm( b(:m) ) - rnorm )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, res )
!
    end if
!
!   PRINT THE RESULTS OF THE TESTS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) '2-norm of residual vector ||a*x-b|| = ', rnorm
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, diagr, beta )
!
!
! END OF PROGRAM ex2_qr_cmp
! =========================
!
end program ex2_qr_cmp

ex2_qr_cmp2.F90

program ex2_qr_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine QR_CMP2 in module QR_Procedures
!   and QR_SOLVE2 in module LLSQ_Procedures for solving full or deficient linear least squares
!   problems using a QR decomposition with column pivoting.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c100, allocate_error, merror, &
                         qr_cmp2, qr_solve2, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=3000, mn=min( m, n )
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c100
!
    character(len=*), parameter :: name_proc='Example 2 of qr_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, anorm, rnorm, err1, err2, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, x, res, diagr, beta
    real(stnd), allocatable, dimension(:,:) :: a, a2
!
    integer(i4b)                            :: krank, j, l, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, do_test, test_lin
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A QR DECOMPOSITION WITH COLUMN PIVOTING OF THE COEFFICIENT
!               MATRIX.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n) ≈ b(:m) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED AND
!   IF RESIDUAL VECTOR MUST BE COMPUTED.
!
    krank = 0
!
    comp_resid = do_test
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), diagr(mn), beta(mn), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS FOR LATER USE.
!
        allocate( a2(m,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE DATA MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A QR DECOMPOSITION WITH COLUMN PIVOTING OF RANDOM DATA MATRIX WITH SUBROUTINE qr_cmp2.
!
    call qr_cmp2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol )
!
!   qr_cmp2 COMPUTES A QR OR COMPLETE ORTHOGONAL FACTORIZATION OF A REAL m-BY-n MATRIX.
!   THE MATRIX MAY BE RANK-DEFICIENT.
!
!   HERE THE ROUTINE COMPUTES A QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF qr_cmp2.
!
!   IN OTHER WORDS, ON EXIT, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!
!   ON INPUT, IF krank=k, THIS IMPLIES THAT THE FIRST k COLUMNS OF a ARE TO BE FORCED
!   INTO THE BASIS. PIVOTING IS PERFORMED ON THE LAST n-k COLUMNS OF a.
!   
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF a
!       ( IN THE 1-NORM) ARE PERFORMED. tol IS THEN USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol. IF tol=0 IS SPECIFIED
!       THE NUMERICAL RANK OF a IS DETERMINED.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   FURTHERMORE, IF tol IS PRESENT AND IS IN [0,1[ IN THE CALL OF qr_cmp2, THE CONDITION
!   NUMBER OF a IS COMPUTED IN ALL CASES AND ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER
!   IS RETURNED IN tol.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE OF FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!   
!   IN A SECOND STEP, IF THE OPTIONAL PARAMETER tau IS PRESENT IN THE CALL OF qr_cmp2,
!   THEN R22 IS CONSIDERED TO BE NEGLIGIBLE AND THE SUBMATRIX R12 IS ANNIHILATED BY
!   ORTHOGONAL TRANSFORMATIONS FROM THE RIGHT, ARRIVING AT THE COMPLETE ORTHOGONAL
!   FACTORIZATION:
!  
!                     a * P ≈ Q * [ T11 0 ] * Z
!                                 [  0  0 ]
!   
!   WHERE P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX,
!   T11 IS A krank-BY-krank UPPER TRIANGULAR MATRIX AND Z IS A n-BY-n
!   ORTHOGONAL MATRIX. P AND Q ARE ARE ALREADY COMPUTED IN THE QR FACTORIZATION
!   WITH COLUM PIVOTING.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR OR COMPLETE 
!   ORTHOGONAL FACTORIZATION, DEPENDING IF ARGUMENT tau IS ABSENT OR PRESENT.
!    
!   IF THE OPTIONAL PARAMETER tau IS ABSENT, qr_cmp2 COMPUTES ONLY
!   A FULL QR FACTORIZATION WITH COLUMN PIVOTING OF a AND:
!
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!    
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G., THE RANK OF R11).
!                      
!   ON THE OTHER HAND, IF THE OPTIONAL PARAMETER tau IS PRESENT, qr_cmp2 COMPUTES A
!   COMPLETE ORTHOGONAL FACTORIZATION FROM THE QR FACTORIZATION WITH COLUMN PIVOTING OF a
!   AND:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY SECTION a(1:krank,1:krank)
!     CONTAIN THE CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX T11. THE ELEMENTS OF
!     THE DIAGONAL OF T11 ARE STORED IN THE ARRAY SECTION diagr(1:krank).
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE COMPLETE DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!
!   - THE ORTHOGONAL MATRIX Z IS STORED IN FACTORED FORM IN THE ARRAY SECTIONS
!     a(:krank,krank+1:n) AND tau(:krank).
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a, I.E., THE ORDER OF
!     THE SUBMATRIX T11.
!                      
!   THIS COMPLETE ORTHOGONAL FACTORIZATION CAN THEN BE USED TO COMPUTE THE MINIMAL 2-NORM
!   SOLUTIONS FOR RANK DEFICIENT LINEAR LEAST SQUARES PROBLEMS WITH SUBROUTINE qr_solve2.
!                      
!
!   NEXT, COMPUTE THE SOLUTION VECTOR FOR LINEAR LEAST SQUARE SYSTEM
!
!                   a(:m,:n)*x(:n) ≈ b(:m) .
!
!   WITH SUBROUTINE qr_solve2 AND THE QR DECOMPOSITION COMPUTED BY qr_cmp2.
!
    call qr_solve2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, b(:m), x(:n),  &
                    rnorm=rnorm, comp_resid=comp_resid )
!
!   qr_solve2 COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING A QR FACTORIZATION WITH COLUMNS PIVOTING OR A COMPLETE
!   ORTHOGONAL FACTORIZATION OF a COMPUTED BY qr_cmp2. a IS AN m-BY-n MATRIX
!   WHICH MAY BE RANK-DEFICIENT. m>=n OR n>m IS PERMITTED.
!
!   HERE, qr_solve2 COMPUTES SOLUTION(S) FROM THE QR FACTORIZATION WITH COLUMNS
!   PIVOTING COMPUTED BY qr_cmp2.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES. b IS OVERWRITTEN BY qr_solve2.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND IS OUTPUT IN b.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF qr_solve2.
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL ARRAY PARAMETER tau IS
!   PRESENT IN BOTH THE CALLS OF qr_cmp2 AND qr_solve2 SUBROUTINES. OTHERWISE, SOLUTION(S) ARE
!   COMPUTED SUCH THAT IF THE jTH COLUMN OF a IS OMITTED FROM THE BASIS, x[j] O
!   x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED CORRECTLY.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    else
!
        test_lin = true
!
    end if
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
        res(:n) = matmul( b(:m), a2(:m,:n) )
!
        err1 = maxval( abs(res(:n)) )/anorm
!
!       CHECK THE NORM OF THE RESIDUAL VECTOR.
!
        err2 = abs( norm( b(:m) ) - rnorm )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, res )
!
    end if
!
!   PRINT THE RESULTS OF THE TESTS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    if ( krank/=mn ) then
        write (prtunit,*) 'Indices of linearly dependent columns    = ', ip(krank+1:n)
    end if
    write (prtunit,*) '2-norm of residual vector ||a*x-b||      = ', rnorm
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, diagr, beta, ip )
!
!
! END OF PROGRAM ex2_qr_cmp2
! ==========================
!
end program ex2_qr_cmp2

ex2_select_eigval_cmp.F90

program ex2_select_eigval_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_EIGVAL_CMP
!   in module Eig_Procedures.
!                                                                              
! LATEST REVISION : 20/01/2020
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, triangle, select_eigval_cmp, trid_inviter, &
                         merror, allocate_error, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX AND
! m IS THE NUMBER OF THE COMPUTED EIGENVALUES/EIGENVECTORS
!
    integer(i4b), parameter :: prtunit=6, n=1000, p=n*(n+1)/2, m=10
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of select_eigval_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: d, res2, vec
    real(stnd), dimension(:,:), allocatable :: a, res, eigvec, d_e
!
    integer(i4b) :: maxiter=2
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test, small, upper=true
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX IN PACKED FORM USING A RATIONAL QR ALGORITHM FOR THE EIGENVALUES
!               AND THE INVERSE ITERATION TECHNIQUE FOR THE EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
!
    err = zero
    do_test = true
!
!   DETERMINE IF YOU WANT TO COMPUTE THE m SMALLEST OR LARGEST EIGENVALUES.
!
    small = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), vec(p), eigvec(n,m), d(m), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
    a = a + transpose( a ) 
!
!   MAKE A COPY OF THE UPPER TRIANGLE OF SELF-ADJOINT MATRIX IN PACKED FORM.
!
    vec(:p) = pack( a, mask=triangle( upper, n, n, extra=1_i4b) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE FIRST m EIGENVALUES OF THE SELF-ADJOINT MATRIX a (IN
!   PACKED FORM) AND SAVE THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    if ( upper ) then
!
!       USE A FAST ALGORITHM.
!
        call select_eigval_cmp( vec(:p), d(:m), small, failure, d_e=d_e )
!
    else
!
!       USE A SLOW ALGORITHM.
!
        call select_eigval_cmp( vec(:p), d(:m), small, failure, upper=upper, d_e=d_e )
!
    end if
!
    if ( .not. failure ) then
!
!       COMPUTE THE ASSOCIATED m EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!       maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1), d_e(:n,2), d(:m), eigvec(:n,:m), failure2,     &
                           matp=vec(:p), maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(n,m), res2(m), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        res(:n,:m) = matmul( a(:n,:n), eigvec(:n,:m)) - eigvec(:n,:m)*spread( d(:m), dim=1, ncopies=n)
        res2(:m) = norm( res(:n,:m), dim=2_i4b )
!
        err1 = maxval( res2(:m) )/( norm( a )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:m,:m) )
!
        res(:m,:m) = abs( a(:m,:m) - matmul( transpose(eigvec(:n,:m)), eigvec(:n,:m) ) )
!
        err2 = maxval( res(:m,:m) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, res2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, vec, eigvec, d_e, d )
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test  .and. .not.failure ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing ', m, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix in packed form is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_select_eigval_cmp
! ====================================
!
end program ex2_select_eigval_cmp

ex2_select_eigval_cmp2.F90

program ex2_select_eigval_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_EIGVAL_CMP2
!   in module Eig_Procedures.
!                                                                              
! LATEST REVISION : 20/01/2020
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, four, c50, allocate_error,    &
                         triangle, merror, get_diag, select_eigval_cmp2, trid_inviter,    &
                         norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX 
!
    integer(i4b), parameter :: prtunit=6, n=1000, p=n*(n+1)/2
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of select_eigval_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, val, elapsed_time
    real(stnd), dimension(:),   pointer     :: d
    real(stnd), dimension(:),   allocatable :: res2, vec
    real(stnd), dimension(:,:), allocatable :: a, res, eigvec, d_e
!
    integer(i4b) :: m, maxiter=4
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test, small, upper=true
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX IN PACKED FORM USING A RATIONAL QR ALGORITHM FOR THE EIGENVALUES
!               AND THE INVERSE ITERATION TECHNIQUE FOR THE EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
!
    err = zero
    do_test = true
!
!   DETERMINE IF YOU WANT TO COMPUTE THE m SMALLEST OR LARGEST EIGENVALUES.
!
    small = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), vec(p), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SEMI-POSITIVE MATRIX a .
!
    call random_number( a )
    a = matmul( a, transpose( a ) )
!
!   DETERMINE TRESHOLD FOR THE SUM OF THE EIGENVALUES.
!
    val = sum( get_diag(a) )/four
!
!   MAKE A COPY OF THE UPPER TRIANGLE OF SELF-ADJOINT MATRIX IN PACKED FORM.
!
    vec(:p) = pack( a, mask=triangle( upper, n, n, extra=1_i4b) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE LARGEST m EIGENVALUES OF THE SELF-ADJOINT MATRIX a (IN PACKED FORM)
!   IN ALEBRAIC VALUE WHOSE SUM EXCEEDS val AND SAVE THE INTERMEDIATE TRIDIAGONAL
!   MATRIX IN PARAMETER d_e .
!
    if ( upper ) then
!
!       USE A FAST ALGORITHM.
!
        call select_eigval_cmp2( vec, d, small, val, failure, d_e=d_e )
!
    else
!
!       USE A SLOW ALGORITHM.
!
        call select_eigval_cmp2( vec, d, small, val, failure, upper=upper, d_e=d_e )
!
    end if
!
!   DETERMINE THE NUMBER OF EIGENVALUES AND ALLOCATE WORK ARRAY FOR
!   COMPUTING THE ASSOCIATED EIGENVECTORS.
!
    m = size( d )
!
    allocate( eigvec(n,m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
    if ( .not. failure ) then
!
!       COMPUTE THE ASSOCIATED m EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!       maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1), d_e(:n,2), d(:m), eigvec(:n,:m), failure2,     &
                           matp=vec(:p), maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(n,m), res2(m), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        res(:n,:m) = matmul( a(:n,:n), eigvec(:n,:m)) - eigvec(:n,:m)*spread( d(:m), dim=1, ncopies=n)
        res2(:m) = norm( res(:n,:m), dim=2_i4b )
!
        err1 = maxval( res2(:m) )/( norm( a )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:m,:m) )
!
        res(:m,:m) = abs( a(:m,:m) - matmul( transpose(eigvec(:n,:m)), eigvec(:n,:m) ) )
!
        err2 = maxval( res(:m,:m) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, res2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, vec, eigvec, d_e, d )
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test  .and. .not.failure ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing ', m, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix in packed form is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_select_eigval_cmp2
! =====================================
!
end program ex2_select_eigval_cmp2

ex2_select_eigval_cmp3.F90

program ex2_select_eigval_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_EIGVAL_CMP3
!   in module Eig_Procedures.
!                                                                              
! LATEST REVISION : 20/01/2020
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, triangle, select_eigval_cmp3,  &
                         trid_inviter, merror, allocate_error, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX AND
! le IS THE NUMBER OF THE WANTED EIGENVALUES/EIGENVECTORS
!
    integer(i4b), parameter :: prtunit=6, n=3000, p=n*(n+1)/2, le=100
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of select_eigval_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, eps, elapsed_time
    real(stnd), dimension(:),   allocatable :: eigval, res2, vec
    real(stnd), dimension(:,:), allocatable :: a, res, eigvec, d_e
!
    integer(i4b) :: maxiter=4, neig
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl)     :: failure, failure2, do_test, small, upper=true
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : PARTIAL EIGENVALUES/EIGENVECTORS DECOMPOSITION OF A n-BY-n REAL
!               SYMMETRIC MATRIX IN PACKED FORM USING A BISECTION ALGORITHM FOR THE EIGENVALUES
!               AND THE INVERSE ITERATION TECHNIQUE FOR THE EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
!
    err = zero
    do_test = true
!
!   DETERMINE IF YOU WANT TO COMPUTE THE m SMALLEST OR LARGEST EIGENVALUES.
!
    small = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), vec(p), eigval(n), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
    a = a + transpose( a ) 
!
!   MAKE A COPY OF THE UPPER TRIANGLE OF SELF-ADJOINT MATRIX IN PACKED FORM.
!
    vec(:p) = pack( a, mask=triangle( upper, n, n, extra=1_i4b) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE FIRST m EIGENVALUES OF THE SELF-ADJOINT MATRIX a (IN
!   PACKED FORM) AND SAVE THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e .
!
    if ( upper ) then
!
!       USE A FAST ALGORITHM.
!
        call select_eigval_cmp3( vec(:p), neig, eigval, small, failure, sort=sort, le=le, d_e=d_e )
!
    else
!
!       USE A SLOW ALGORITHM.
!
        call select_eigval_cmp3( vec(:p), neig, eigval, small, failure, upper=upper, sort=sort, le=le, d_e=d_e )
!
    end if
!
    if ( .not. failure .and. neig>0  ) then
!
!       ALLOCATE WORK ARRAY NEEDED TO STORE THE EIGENVECTORS.
!
        allocate( eigvec(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE ASSOCIATED neig EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!       maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION.
!
        call trid_inviter( d_e(:n,1), d_e(:n,2), eigval(:neig), eigvec(:n,:neig), failure2,     &
                           matp=vec(:p), maxiter=maxiter )
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. .not.failure .and. neig>0 ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( res(n,neig), res2(neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*eigvec - eigvec*d
!       WHERE d ARE THE EIGENVALUES AND eigvec THE EIGENVECTORS OF a.
!
        res(:n,:neig) = matmul( a(:n,:n), eigvec(:n,:neig)) - eigvec(:n,:neig)*spread( eigval(:neig), dim=1, ncopies=n)
        res2(:neig) = norm( res(:n,:neig), dim=2_i4b )
!
        err1 = maxval( res2(:neig) )/( norm( a )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - eigvec**(t)*eigvec.
!
        call unit_matrix( a(:neig,:neig) )
!
        res(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose(eigvec(:n,:neig)), eigvec(:n,:neig) ) )
!
        err2 = maxval( res(:neig,:neig) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( res, res2 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, vec, d_e, eigval )
!
    if ( allocated( eigvec ) )  deallocate( eigvec )
!
    if ( err<=eps .and. .not.failure  .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test  .and. .not.failure .and. neig>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues/eigenvectors couplets = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors                 = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix in packed form is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_select_eigval_cmp3
! =====================================
!
end program ex2_select_eigval_cmp3

ex2_select_singval_cmp.F90

program ex2_select_singval_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix using
!   the Chan-Golub-Reinsch bidiagonal reduction algorithm, a bisection or dqds algorithm for
!   singular values and a (Godunov) deflation method for singular vectors.
!
!   The computations are parallelized if OpenMP is used and efficient variants of
!   the Chan-Golub-Reinsch bidiagonal reduction, bisection, dqds and deflation algorithms
!   are used.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_DEFLATE2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c0_9, c1_5, c50, safmin, bd_deflate2,   &
                         select_singval_cmp, merror, allocate_error, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! ls IS THE NUMBER OF THE TOP SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
! max_qr_steps IS THE MAXIMUM NUMBER OF QR STEPS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS
! IN THE DEFLATION ALGORITHM.
!
    integer(i4b), parameter :: prtunit=6, n=3000, m=3000, mn=min(m,n), ls=3000, max_qr_steps=4_i4b
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of select_singval_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, abstol, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, leftvec, rightvec, resid, rla
    real(stnd), dimension(:),   allocatable :: s, d, e, tauo, tauq, taup, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr, mnthr_nsing
!
    logical(lgl) :: failure1, failure2, ortho, gen_q, two_stage, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A n-BY-m REAL MATRIX USING THE CHAN-GOLUB-REINSCH
!               BIDIAGONAL REDUCTION ALGORITHM, A BISECTION OR DQDS ALGORITHM FOR
!               SINGULAR VALUES AND THE GODUNOV DEFLATION TECHNIQUE FOR
!               SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = true
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF max(n,m) EXCEEDS mnthr ,
!   A QR OR LQ FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( mn, stnd )*c1_5, i4b )
!
    two_stage = max( n, m )>=mnthr
!    two_stage = true
!    two_stage = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING SINGULAR VECTORS. IF max(n,m)
!   EXCEEDS mnthr (E.G. A PRELIMINARY QR OR QL IS DONE) AND ls
!   EXCEEDS mnthr_nsing, THE ORTHOGONAL MATRIX q OF THE QR OR
!   QL FACTORIZATION IS GENERATED AND THE SINGULAR VECTORS
!   ARE COMPUTED BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( mn, stnd )*c0_9, i4b )
!
    gen_q = ls>=mnthr_nsing .and. two_stage
!    gen_q = true
!    gen_q = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        if ( gen_q ) then
            allocate( a(n,m), rla(mn,mn), s(mn), d(mn), e(mn), &
                      tauq(mn), taup(mn), stat=iok )
        else
            allocate( a(n,m), rla(mn,mn), s(mn), d(mn), e(mn), &
                      tauo(mn), tauq(mn), taup(mn), stat=iok )
        end if
    else
        allocate( a(n,m), s(mn), d(mn), e(mn), tauq(mn), &
                  taup(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a.
!
    call random_number( a )
!
    if ( do_test ) then
!
!       COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
        anorm = norm( a )
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,mn), resid2(mn), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G. A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE GOLUB AND REINSCH
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION METHOD APPLIED TO THE
!   GOLUB-KAHAN TRIDIAGONAL FORM OF THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE
!   RESULTS WITH SUBROUTINE select_singval_cmp.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ).
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM.
!       THE MATRIX a IS FIRST REDUCED TO TRIANGULAR FORM BY A QR OR LQ FACTORIZATION
!       IN A FIRST STAGE. THE TRIANGULAR MATRIX IS REDUCED TO BIDIAGONAL FORM IN A
!       SECOND STAGE.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM IN THE TWO-STAGE 
!       ALGORITHM ARE STORED IN a, rla, tauo, tauq AND taup.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        if ( gen_q ) then
!
            call select_singval_cmp( a, rla, nsing, s, failure=failure1, sort=sort, d=d, e=e,  &
                                     ls=ls, abstol=abstol, tauq=tauq, taup=taup, dqds=dqds )
!
        else
!
            call select_singval_cmp( a, rla, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                     ls=ls, abstol=abstol, tauo=tauo, tauq=tauq, taup=taup,   &
                                     dqds=dqds )
!
        end if
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN a, tauq, taup.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        call select_singval_cmp( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,   &
                                 ls=ls, abstol=abstol, tauq=tauq, taup=taup, dqds=dqds )
!
    end if
!
!   ON EXIT OF select_singval_cmp :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_deflate2 IN THE SECOND STEP.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM d_e
!   ARE STORED IN PACKED FORM IN a, tauq, taup AND ALSO rla AND tauo IF A
!   TWO-STAGE ALGORITHM HAS BEEN USED.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY 
!       A DEFLATION TECHNIQUE APPLIED ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_deflate2 .
!
!       ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!       MATRIX (OR OF a). THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!       COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!       (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!       SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
!       OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!       DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!       THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!       EXCEEDS max_qr_steps * nsing.
!
        if ( two_stage ) then
!
            if ( gen_q ) then
!
                call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), rla(:mn,:mn), &
                                  d(:mn), e(:mn), s(:nsing),                    &
                                  leftvec(:n,:nsing), rightvec(:m,:nsing),      &
                                  failure=failure2, ortho=ortho,                &
                                  max_qr_steps=max_qr_steps                     )
!
            else
!
                call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), rla(:mn,:mn), &
                                  d(:mn), e(:mn), s(:nsing),                    &
                                  leftvec(:n,:nsing), rightvec(:m,:nsing),      &
                                  failure=failure2, tauo=tauo(:mn),             &
                                  ortho=ortho, max_qr_steps=max_qr_steps        )
!
            end if
!
        else
!
            call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), d(:mn), e(:mn), s(:nsing),   &
                              leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2,   &
                              ortho=ortho, max_qr_steps=max_qr_steps                       )
!
        end if
!
!       ON EXIT OF bd_deflate2 :
!
!           failure= false :  INDICATES SUCCESSFUL EXIT OF THE DEFLATION ALGORITHM
!           failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                             DEFLATION ALGORITHM.

!           leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!           SINGULAR VECTORS OF a .
!
!       bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nsing) - u(:n,:nsing)*s(:nsing,:nsing),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        resid2(:nsing) = norm( resid(:n,:nsing), dim=2_i4b )
!
        err1 = maxval( resid2(:nsing) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nsing)**(t)*u(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        deallocate( a2, resid, resid2 )
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, tauq, taup, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, tauq, taup )
!
    end if
!
    if ( two_stage ) then
!
        if ( gen_q ) then
            deallocate( rla )
        else
            deallocate( rla, tauo )
        end if
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from select_singval_cmp() ) = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_deflate2() )        = ', failure2
!
    if ( do_test .and. nsing>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_select_singval_cmp
! =====================================
!
end program ex2_select_singval_cmp

ex2_select_singval_cmp2.F90

program ex2_select_singval_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP2
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix
!   using the Chan-Golub-Reinsch bidiagonal reduction algorithm, a bisection or dqds algorithm
!   for singular values and a (Godunov) deflation method for singular vectors.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Chan-Golub-Reinsch bidiagonal reduction, bisection, dqds and deflation algorithms
!   are used.
!
!   Subroutine SELECT_SINGVAL_CMP2 is faster than SELECT_SINGVAL_CMP for computing singular
!   values, if bisection is used, but may be less accurate for some matrices.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_DEFLATE2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c0_9, c1_5, c50, safmin, bd_deflate2,   &
                         select_singval_cmp2, merror, allocate_error, norm, unit_matrix
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! ls IS THE NUMBER OF THE TOP SINGULAR TRIPLETS WHICH MUST BE COMPUTED.
! max_qr_steps IS THE MAXIMUM NUMBER OF QR STEPS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS
! IN THE DEFLATION ALGORITHM.
!
    integer(i4b), parameter :: prtunit=6, n=3000, m=3000, mn=min(m,n), ls=3000, max_qr_steps=4_i4b
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of select_singval_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, abstol, anorm, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, leftvec, rightvec, resid, rla
    real(stnd), dimension(:),   allocatable :: s, d, e, tauo, tauq, taup, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr, mnthr_nsing
!
    logical(lgl) :: failure1, failure2, gen_q, two_stage, dqds, ortho, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A n-BY-m REAL MATRIX USING THE CHAN-GOLUB-REINSCH
!               BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE GODUNOV DEFLATION TECHNIQUE FOR
!               SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = true
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF max(n,m) EXCEEDS mnthr ,
!   A QR OR LQ FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( mn, stnd )*c1_5, i4b )
!
    two_stage = max( n, m )>=mnthr
!    two_stage = true
    two_stage = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING SINGULAR VECTORS. IF max(n,m)
!   EXCEEDS mnthr (E.G. A PRELIMINARY QR OR QL IS DONE) AND ls
!   EXCEEDS mnthr_nsing, THE ORTHOGONAL MATRIX q OF THE QR OR
!   QL FACTORIZATION IS GENERATED AND THE SINGULAR VECTORS
!   ARE COMPUTED BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( mn, stnd )*c0_9, i4b )
!
    gen_q = ls>=mnthr_nsing .and. two_stage
!    gen_q = true
!    gen_q = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        if ( gen_q ) then
            allocate( a(n,m), rla(mn,mn), s(mn), d(mn), e(mn), &
                      tauq(mn), taup(mn), stat=iok )
        else
            allocate( a(n,m), rla(mn,mn), s(mn), d(mn), e(mn), &
                      tauo(mn), tauq(mn), taup(mn), stat=iok )
        end if
    else
        allocate( a(n,m), s(mn), d(mn), e(mn), tauq(mn), &
                  taup(mn), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a.
!
    call random_number( a )
!
    if ( do_test ) then
!
!       COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
        anorm = norm( a )
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,mn), resid2(mn), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE GOLUB AND REINSCH
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE
!   select_singval_cmp2.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp2 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp2 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
!   select_singval_cmp2 IS FASTER THAN select_singval_cmp, BUT MAY BE LESS ACCURATE FOR SOME
!   MATRICES IF BISECTION IS USED.
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM.
!       THE MATRIX a IS FIRST REDUCED TO TRIANGULAR FORM BY A QR OR LQ FACTORIZATION
!       IN A FIRST STAGE. THE TRIANGULAR MATRIX IS REDUCED TO BIDIAGONAL FORM IN A
!       SECOND STAGE.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM IN THE TWO-STAGE 
!       ALGORITHM ARE STORED IN a, rla, tauo, tauq AND taup.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        if ( gen_q ) then
!
            call select_singval_cmp2( a, rla, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                      ls=ls, abstol=abstol, tauq=tauq, taup=taup, dqds=dqds )
!
        else
!
            call select_singval_cmp2( a, rla, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                      ls=ls, abstol=abstol, tauo=tauo, tauq=tauq, taup=taup,   &
                                      dqds=dqds )
!
        end if
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM
!       ARE STORED IN a, tauq, taup.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        call select_singval_cmp2( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,   &
                                  ls=ls, abstol=abstol, tauq=tauq, taup=taup, dqds=dqds )
!
    end if
!
!   ON EXIT OF select_singval_cmp2 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS
!   IS REQUIRED BY SUBROUTINE bd_deflate2 IN THE SECOND STEP.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM d_e
!   ARE STORED IN PACKED FORM IN a, tauq, taup AND ALSO rla AND tauo IF A
!   TWO-STAGE ALGORITHM HAS BEEN USED.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp2.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY 
!       A DEFLATION TECHNIQUE APPLIED ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_deflate2 .
!
!       ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!       MATRIX (OR OF a). THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!       COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!       (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!       SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
!       OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!       DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!       THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!       EXCEEDS max_qr_steps * nsing.
!
        if ( two_stage ) then
!
            if ( gen_q ) then
!
                call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), rla(:mn,:mn), &
                                  d(:mn), e(:mn), s(:nsing),                    &
                                  leftvec(:n,:nsing), rightvec(:m,:nsing),      &
                                  failure=failure2, ortho=ortho,                &
                                  max_qr_steps=max_qr_steps                     )
!
            else
!
                call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), rla(:mn,:mn), &
                                  d(:mn), e(:mn), s(:nsing),                    &
                                  leftvec(:n,:nsing), rightvec(:m,:nsing),      &
                                  failure=failure2, tauo=tauo(:mn),             &
                                  ortho=ortho, max_qr_steps=max_qr_steps        )
!
            end if
!
        else
!
            call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), d(:mn), e(:mn), s(:nsing),   &
                              leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2,   &
                              ortho=ortho, max_qr_steps=max_qr_steps                       )
!
        end if
!
!       ON EXIT OF bd_deflate2 :
!
!           failure= false :  INDICATES SUCCESSFUL EXIT OF THE DEFLATION ALGORITHM
!           failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                             DEFLATION ALGORITHM.

!           leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!           SINGULAR VECTORS OF a .
!
!       bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        resid2(:nsing) = norm( resid(:n,:nsing), dim=2_i4b )
!
        err1 = maxval( resid2(:nsing) )/ ( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
!
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        deallocate( a2, resid, resid2 )
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( leftvec, rightvec )
!
    end if
!
!
    if ( two_stage ) then
!
        if ( gen_q ) then
            deallocate( a, rla, s, d, e, tauq, taup )
        else
            deallocate( a, rla, s, d, e, tauo, tauq, taup )
        end if
!
    else
!
        deallocate( a, s, d, e, tauq, taup )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from select_singval_cmp2() ) = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_deflate2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_select_singval_cmp2
! ======================================
!
end program ex2_select_singval_cmp2

ex2_select_singval_cmp3.F90

program ex2_select_singval_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP3
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix using a
!   Chan-Rhala-Barlow one-sided bidiagonal reduction algorithm, a bisection or dqds algorithm for
!   singular values and a (Godunov) deflation method for singular vectors. The real matrix
!   must have more rows than columns.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Chan-Rhala-Barlow one-sided bidiagonal reduction, bisection, dqds and deflation
!   algorithms are used.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_DEFLATE2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c0_9, c1_5, c50, safmin, bd_deflate2,  &
                         select_singval_cmp3, merror, allocate_error, norm, unit_matrix, ifirstloc
!         
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE n>=m),
! ls IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT.
! max_qr_steps IS THE MAXIMUM NUMBER OF QR STEPS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS
! IN THE DEFLATION ALGORITHM.
!
    integer(i4b), parameter :: prtunit=6, n=3000, m=3000, ls=3000, max_qr_steps=3_i4b
!
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 2 of select_singval_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, abstol, anorm, tmp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, p, leftvec, rightvec, resid, ra
    real(stnd), dimension(:),   allocatable :: s, d, e, tauo, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr, mnthr_nsing, nvec
!
    logical(lgl) :: failure1, failure2, failure_bd, ortho, gen_p, gen_q, reortho, two_stage, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A n-BY-m REAL MATRIX WITH n>=m USING THE RHALA-BARLOW
!               ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE GODUNOV DEFLATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = false
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW BIDIAGONAL ALGORITHM.
!
    reortho = true
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF n EXCEEDS mnthr ,
!   A QR FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( m, stnd )*c1_5, i4b )
!
    two_stage = n>=mnthr
!    two_stage = true
!    two_stage = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING RIGHT SINGULAR VECTORS. IF
!   ls EXCEEDS mnthr_nsing, THE RIGHT ORTHOGONAL MATRIX p
!   OF THE ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM IS GENERATED
!   AND BACK-TRANSFORMATION IS DONE BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( m, stnd )*c0_9, i4b )
!
    gen_p = ls>=mnthr_nsing
!    gen_p = true
!    gen_p = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING LEFT SINGULAR VECTORS. IF n
!   EXCEEDS mnthr (E.G. A PRELIMINARY QR IS DONE) AND ls
!   EXCEEDS mnthr_nsing, THE ORTHOGONAL MATRIX q OF THE QR
!   FACTORIZATION IS GENERATED AND THE LEFT SINGULAR VECTORS
!   ARE COMPUTED BY A MATRIX MULTIPLICATION.
!
    gen_q = ls>=mnthr_nsing .and. two_stage
!    gen_q = true
!    gen_q = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        if ( gen_q ) then
            allocate( a(n,m), ra(m,m), s(m), d(m),    &
                      e(m), p(m,m), stat=iok )
        else
            allocate( a(n,m), ra(m,m), s(m), d(m),    &
                      e(m), tauo(m), p(m,m), stat=iok )
        end if
    else
        allocate( a(n,m), s(m), d(m), e(m), &
                  p(m,m), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a.
!
    call random_number( a )
!
    if ( do_test ) then
!
!       COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
        anorm = norm( a )
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,m), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G. A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE RHALA-BARLOW ONE-SIDED
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   GOLUB-KAHAN TRIDIAGONAL FORM OF THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE
!   RESULTS WITH SUBROUTINE select_singval_cmp3.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp3 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp3 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ) IF BISECTION IS USED.
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM.
!       THE MATRIX a IS FIRST REDUCED TO TRIANGULAR FORM BY A QR FACTORIZATION
!       IN A FIRST STAGE. THE TRIANGULAR MATRIX IS REDUCED TO BIDIAGONAL FORM IN A
!       SECOND STAGE.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM IN THE TWO-STAGE 
!       ALGORITHM ARE STORED IN a, ra, tauo AND p.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        if ( gen_q ) then
!
            call select_singval_cmp3( a, ra, nsing, s, failure=failure1, sort=sort, d=d, e=e,  &
                                      ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                                      dqds=dqds, failure_bd=failure_bd )
!
        else
!
            call select_singval_cmp3( a, ra, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                      ls=ls, abstol=abstol, tauo=tauo, p=p, gen_p=gen_p,      &
                                      reortho=reortho, dqds=dqds, failure_bd=failure_bd )
!
        end if
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM ARE STORED IN a AND p .
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        call select_singval_cmp3( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,      &
                                  ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                                  dqds=dqds, failure_bd=failure_bd )
!
    end if
!
!   ON EXIT OF select_singval_cmp3 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION ALGORITHM
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   failure_bd= false :  INDICATES SUCCESSFUL EXIT IN THE BIDIAGONAL REDUCTION ALGORITHM
!   failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS OF ORTHOGONALITY
!                        CAN BE EXPECTED IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!                        OF a FOR THE LAST LEFT SINGULAR VECTORS.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS IS
!   REQUIRED BY SUBROUTINE bd_deflate2 IN THE SECOND STEP.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp3.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY 
!       A DEFLATION TECHNIQUE APPLIED ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_deflate2 .
!
!       ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!       MATRIX (OR OF a). THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!       COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!       (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!       SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
!       OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!       DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!       THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!       EXCEEDS max_qr_steps * nsing.
!
        if ( two_stage ) then
!
            if ( gen_q ) then
!
               call bd_deflate2( a(:n,:m), ra(:m,:m), p(:m,:m),            &
                                 d(:m), e(:m), s(:nsing),                  &
                                 leftvec(:n,:nsing), rightvec(:m,:nsing),  &
                                 failure=failure2, ortho=ortho,            &
                                 max_qr_steps=max_qr_steps                 )
!
            else
!
                call bd_deflate2( a(:n,:m), ra(:m,:m), p(:m,:m),            &
                                  d(:m), e(:m), s(:nsing),                  &
                                  leftvec(:n,:nsing), rightvec(:m,:nsing),  &
                                  failure=failure2, tauo=tauo(:m),          &
                                  ortho=ortho, max_qr_steps=max_qr_steps    )
!
            end if
!
        else
!
            call bd_deflate2( a(:n,:m), p(:m,:m), d(:m), e(:m), s(:nsing),  &
                              leftvec(:n,:nsing), rightvec(:m,:nsing),      &
                              failure=failure2, ortho=ortho,                &
                              max_qr_steps=max_qr_steps                     )
!
        end if
!
!       ON EXIT OF bd_deflate2 :
!
!           failure= false :  INDICATES SUCCESSFUL EXIT IN THE DEFLATION ALGORITHM
!           failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                             DEFLATION ALGORITHM.

!           leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!           SINGULAR VECTORS OF a .
!
!       bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nvec = ifirstloc( logical( s(:nsing)<=tmp, lgl ) ) - 1_i4b
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nvec) - u(:n,:nvec)*s(:nvec,:nvec),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nvec) = matmul(a2,rightvec(:m,:nvec)) - leftvec(:n,:nvec)*spread(s(:nvec),dim=1,ncopies=n)
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 = maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nvec)**(t)*u(:n,:nvec).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(leftvec(:n,:nvec)), leftvec(:n,:nvec) ) )
!
        err2 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        deallocate( a2, resid, resid2 )
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, p, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, p )
!
    end if
!
    if ( two_stage ) then
!
        if ( gen_q ) then
            deallocate( ra )
        else
            deallocate( ra, tauo )
        end if
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from select_singval_cmp3() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from select_singval_cmp3() ) = ', failure1
    write (prtunit,*) ' FAILURE    ( from bd_deflate2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 'Number of safe singular triplets                     = ', nvec
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_select_singval_cmp3
! ======================================
!
end program ex2_select_singval_cmp3

ex2_select_singval_cmp3_bis.F90

program ex2_select_singval_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP3
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix using the
!   Rhala-Barlow one-sided bidiagonal reduction algorithm, a bisection or dqds algorithm for
!   singular values and a (Godunov) deflation method for singular vectors. The real matrix
!   must have more rows than columns.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Rhala-Barlow one-sided bidiagonal reduction, bisection, dqds and deflation
!   algorithms are used.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_DEFLATE2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!
! ================================================================================================
!
!
! USED MODULES
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c0_9, c50, safmin, bd_deflate2,        &
                         select_singval_cmp3, merror, allocate_error, norm, unit_matrix, ifirstloc
!         
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE n>=m),
! ls IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT.
! max_qr_steps IS THE MAXIMUM NUMBER OF QR STEPS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS
! IN THE DEFLATION ALGORITHM.
!
    integer(i4b), parameter :: prtunit=6, n=3000, m=3000, ls=3000, max_qr_steps=4_i4b
!
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 2 of select_singval_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, abstol, anorm, tmp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, p, leftvec, rightvec, resid, ra
    real(stnd), dimension(:),   allocatable :: s, d, e, tauo, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr_nsing, nvec
!
    logical(lgl) :: failure1, failure2, failure_bd, ortho, gen_p, reortho, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A n-BY-m REAL MATRIX WITH n>=m USING THE RHALA-BARLOW
!               ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE GODUNOV DEFLATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = true
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW BIDIAGONAL ALGORITHM.
!
    reortho = false
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING RIGHT SINGULAR VECTORS. IF
!   ls EXCEEDS mnthr_nsing, THE RIGHT ORTHOGONAL MATRIX p
!   OF THE ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM IS GENERATED
!   AND BACK-TRANSFORMATION IS DONE BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( m, stnd )*c0_9, i4b )
!
    gen_p = ls>=mnthr_nsing
!    gen_p = true
!    gen_p = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), s(m), d(m), e(m), &
              p(m,m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a.
!
    call random_number( a )
!
    if ( do_test ) then
!
!       COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
        anorm = norm( a )
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,m), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G., A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE RHALA-BARLOW ONE-SIDED
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   GOLUB-KAHAN TRIDIAGONAL FORM OF THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE
!   RESULTS WITH SUBROUTINE select_singval_cmp3.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp3 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp3 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ) IF BISECTION IS USED.
!
    call select_singval_cmp3( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,      &
                              ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                              dqds=dqds, failure_bd=failure_bd )
!
!   ON EXIT OF select_singval_cmp3 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   failure_bd= false :  INDICATES SUCCESSFUL EXIT IN THE BIDIAGONAL REDUCTION ALGORITHM.
!   failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS OF ORTHOGONALITY
!                        CAN BE EXPECTED IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!                        OF a FOR THE LAST LEFT SINGULAR VECTORS.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM ARE STORED IN a AND p .
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS IS
!   REQUIRED BY SUBROUTINE bd_deflate2 IN THE SECOND STEP.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp3.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY 
!       A DEFLATION TECHNIQUE APPLIED ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_deflate2 .
!
!       ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!       MATRIX (OR OF a). THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!       COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!       (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!       SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
!       OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!       DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!       THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!       EXCEEDS max_qr_steps * nsing.
!
        call bd_deflate2( a(:n,:m), p(:m,:m), d(:m), e(:m), s(:nsing),      &
                          leftvec(:n,:nsing), rightvec(:m,:nsing),          &
                          failure=failure2, ortho=ortho,                    &
                          max_qr_steps=max_qr_steps                         )
!
!       ON EXIT OF bd_deflate2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT IN THE DEFLATION ALGORITHM.
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                             DEFLATION ALGORITHM.

!       leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!       SINGULAR VECTORS OF a .
!
!       bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nvec = ifirstloc( logical( s(:nsing)<=tmp, lgl ) ) - 1_i4b
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nvec) - u(:n,:nvec)*s(:nvec,:nvec),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nvec) = matmul(a2,rightvec(:m,:nvec)) - leftvec(:n,:nvec)*spread(s(:nvec),dim=1,ncopies=n)
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 = maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nvec)**(t)*u(:n,:nvec).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(leftvec(:n,:nvec)), leftvec(:n,:nvec) ) )
!
        err2 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        deallocate( a2, resid, resid2 )
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, p, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, p )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from select_singval_cmp3() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from select_singval_cmp3() ) = ', failure1
    write (prtunit,*) ' FAILURE    ( from bd_deflate2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 'Number of safe singular triplets                     = ', nvec
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_select_singval_cmp3
! ======================================
!
end program ex2_select_singval_cmp3

ex2_select_singval_cmp4.F90

program ex2_select_singval_cmp4
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP4
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix using a
!   Chan-Rhala-Barlow one-sided bidiagonal reduction algorithm, a bisection or dqds algorithm for
!   singular values and a (Godunov) deflation method for singular vectors. The real matrix
!   must have more rows than columns.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Chan-Rhala-Barlow one-sided bidiagonal reduction, bisection, dqds and deflation
!   algorithms are used.
!   Subroutine SELECT_SINGVAL_CMP4 is faster than SELECT_SINGVAL_CMP3 for computing singular
!   values, if bisection is used, but may be less accurate for some matrices.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_DEFLATE2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c0_9, c1_5, c50, safmin, bd_deflate2,  &
                         select_singval_cmp4, merror, allocate_error, norm, unit_matrix, ifirstloc
!
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE n>=m),
! ls IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT.
! max_qr_steps IS THE MAXIMUM NUMBER OF QR STEPS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS
! IN THE DEFLATION ALGORITHM.
!
    integer(i4b), parameter :: prtunit=6, n=3000, m=2000, ls=2000, max_qr_steps=4_i4b
!
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 2 of select_singval_cmp4'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, abstol, anorm, tmp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, p, leftvec, rightvec, resid, ra
    real(stnd), dimension(:),   allocatable :: s, d, e, tauo, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr, mnthr_nsing, nvec
!
    logical(lgl) :: failure1, failure2, failure_bd, ortho, gen_p, gen_q, reortho, two_stage, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A n-BY-m REAL MATRIX WITH n>=m USING THE RHALA-BARLOW
!               ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE GODUNOV DEFLATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = false
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW BIDIAGONAL ALGORITHM.
!
    reortho = false
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   DETERMINE CROSSOVER POINT FOR THE SVD WHEN REDUCING
!   a TO BIDIAGONAL FORM. IF n EXCEEDS mnthr ,
!   A QR FACTORIZATION IS USED FIRST TO REDUCE THE
!   n-BY-m MATRIX a TO A TRIANGULAR FORM.
!    
    mnthr = int( real( m, stnd )*c1_5, i4b )
!
    two_stage = n>=mnthr
!    two_stage = true
!    two_stage = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING RIGHT SINGULAR VECTORS. IF
!   ls EXCEEDS mnthr_nsing, THE RIGHT ORTHOGONAL MATRIX p
!   OF THE ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM IS GENERATED
!   AND BACK-TRANSFORMATION IS DONE BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( m, stnd )*c0_9, i4b )
!
    gen_p = ls>=mnthr_nsing
!    gen_p = true
!    gen_p = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING LEFT SINGULAR VECTORS. IF n
!   EXCEEDS mnthr (E.G. A PRELIMINARY QR IS DONE) AND ls
!   EXCEEDS mnthr_nsing, THE ORTHOGONAL MATRIX q OF THE QR
!   FACTORIZATION IS GENERATED AND THE LEFT SINGULAR VECTORS
!   ARE COMPUTED BY A MATRIX MULTIPLICATION.
!
    gen_q = ls>=mnthr_nsing .and. two_stage
!    gen_q = true
!    gen_q = false
!
!   ALLOCATE WORK ARRAYS.
!
    if ( two_stage ) then
        if ( gen_q ) then
            allocate( a(n,m), ra(m,m), s(m), d(m),    &
                      e(m), p(m,m), stat=iok )
        else
            allocate( a(n,m), ra(m,m), s(m), d(m),    &
                      e(m), tauo(m), p(m,m), stat=iok )
        end if
    else
        allocate( a(n,m), s(m), d(m), e(m), &
                  p(m,m), stat=iok )
    end if
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a.
!
    call random_number( a )
!
    if ( do_test ) then
!
!       COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
        anorm = norm( a )
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,m), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G. A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE RHALA-BARLOW ONE-SIDED
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE
!   select_singval_cmp4.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp4 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp4 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ) IF BISECTION IS USED.
!
!   select_singval_cmp4 IS FASTER THAN select_singval_cmp3, BUT MAY BE LESS ACCURATE
!   FOR SOME MATRICES IF BISECTION IS USED.
!
    if ( two_stage ) then
!
!       STEP1A : REDUCE THE MATRIX a TO BIDIAGONAL FORM BY A TWO-STAGE ALGORITHM.
!       THE MATRIX a IS FIRST REDUCED TO TRIANGULAR FORM BY A QR FACTORIZATION
!       IN A FIRST STAGE. THE TRIANGULAR MATRIX IS REDUCED TO BIDIAGONAL FORM IN A
!       SECOND STAGE.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM IN THE TWO-STAGE 
!       ALGORITHM ARE STORED IN a, ra, tauo AND p.
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        if ( gen_q ) then
!
            call select_singval_cmp4( a, ra, nsing, s, failure=failure1, sort=sort, d=d, e=e,  &
                                      ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                                      dqds=dqds, failure_bd=failure_bd )
!
        else
!
            call select_singval_cmp4( a, ra, nsing, s, failure=failure1, sort=sort, d=d, e=e, &
                                      ls=ls, abstol=abstol, tauo=tauo, p=p, gen_p=gen_p,      &
                                      reortho=reortho, dqds=dqds, failure_bd=failure_bd )
!
        end if
!
    else
!
!       STEP1B : REDUCE THE MATRIX a TO BIDIAGONAL FORM WITHOUT A QR OR LQ FACTORIZATION.
!       THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!       THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM ARE STORED IN a AND p .
!       FINALLY, THE SINGULAR VALUES ARE COMPUTED BY THE BISECTION OR DQDS ALGORITHM.
!
        call select_singval_cmp4( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,      &
                                  ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                                  dqds=dqds, failure_bd=failure_bd )
!
    end if
!
!   ON EXIT OF select_singval_cmp4 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   failure_bd= false :  INDICATES SUCCESSFUL EXIT IN THE BIDIAGONAL REDUCTION ALGORITHM
!   failure_bd= true  :  INDICATES THAT a IS NEARLY SINGULAR AND SOME LOSS OF ORTHOGONALITY
!                        CAN BE EXPECTED IN THE RALHA-BARLOW ONE-SIDED BIDIAGONALIZATION
!                        OF a FOR THE LAST LEFT SINGULAR VECTORS.
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS IS
!   REQUIRED BY SUBROUTINE bd_deflate2 IN THE SECOND STEP.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp4.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY 
!       A DEFLATION TECHNIQUE APPLIED ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_deflate2 .
!
!       ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!       MATRIX (OR OF a). THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!       COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!       (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!       SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
!       OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!       DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!       THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!       EXCEEDS max_qr_steps * nsing.
!
        if ( two_stage ) then
!
            if ( gen_q ) then
!
               call bd_deflate2( a(:n,:m), ra(:m,:m), p(:m,:m),            &
                                 d(:m), e(:m), s(:nsing),                  &
                                 leftvec(:n,:nsing), rightvec(:m,:nsing),  &
                                 failure=failure2, ortho=ortho,            &
                                 max_qr_steps=max_qr_steps                 )
!
            else
!
                call bd_deflate2( a(:n,:m), ra(:m,:m), p(:m,:m),            &
                                  d(:m), e(:m), s(:nsing),                  &
                                  leftvec(:n,:nsing), rightvec(:m,:nsing),  &
                                  failure=failure2, tauo=tauo(:m),          &
                                  ortho=ortho, max_qr_steps=max_qr_steps    )
!
            end if
!
        else
!
            call bd_deflate2( a(:n,:m), p(:m,:m), d(:m), e(:m), s(:nsing),  &
                              leftvec(:n,:nsing), rightvec(:m,:nsing),      &
                              failure=failure2, ortho=ortho,                &
                              max_qr_steps=max_qr_steps                     )
!
        end if
!
!       ON EXIT OF bd_deflate2 :
!
!           failure= false :  INDICATES SUCCESSFUL EXIT IN THE DEFLATION ALGORITHM
!           failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                             DEFLATION ALGORITHM.

!           leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!           SINGULAR VECTORS OF a .
!
!       bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nvec = ifirstloc( logical( s(:nsing)<=tmp, lgl ) ) - 1_i4b
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nvec) - u(:n,:nvec)*s(:nvec,:nvec),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nvec) = matmul(a2,rightvec(:m,:nvec)) - leftvec(:n,:nvec)*spread(s(:nvec),dim=1,ncopies=n)
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 = maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nvec)**(t)*u(:n,:nvec).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(leftvec(:n,:nvec)), leftvec(:n,:nvec) ) )
!
        err2 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        deallocate( a2, resid, resid2 )
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, p, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, p )
!
    end if
!
    if ( two_stage ) then
!
        if ( gen_q ) then
            deallocate( ra )
        else
            deallocate( ra, tauo )
        end if
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from select_singval_cmp4() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from select_singval_cmp4() ) = ', failure1
    write (prtunit,*) ' FAILURE    ( from bd_deflate2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 'Number of safe singular triplets                     = ', nvec
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_select_singval_cmp4
! ======================================
!
end program ex2_select_singval_cmp4

ex2_select_singval_cmp4_bis.F90

program ex2_select_singval_cmp4
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SELECT_SINGVAL_CMP4
!   in module SVD_Procedures for computing a full or partial SVD of a real n-by-m matrix using the
!   Rhala-Barlow one-sided bidiagonal reduction algorithm, a bisection algorithm for
!   singular values and a (Godunov) deflation method for singular vectors. The real matrix
!   must have more rows than columns.
!
!   The computations are parallelized if OpenMP is used and highly efficient variants of
!   the Rhala-Barlow one-sided bidiagonal reduction, bisection, dqds and deflation
!   algorithms are used.
!
!   Subroutine SELECT_SINGVAL_CMP4 is faster than SELECT_SINGVAL_CMP3 for computing singular
!   values, if bisection is used, but may be less accurate for some matrices.
!                                                                              
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutine BD_DEFLATE2 in module SVD_Procedures.
!                                                                              
! LATEST REVISION : 09/02/2024
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c0_9, c50, safmin, bd_deflate2,        &
                         select_singval_cmp4, merror, allocate_error, norm, unit_matrix, ifirstloc
!         
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX (HERE n>=m),
! ls IS THE TARGET RANK OF THE APPROXIMATE PARTIAL SVD, WHICH IS SOUGHT.
! max_qr_steps IS THE MAXIMUM NUMBER OF QR STEPS PERFORMED TO COMPUTE THE TOP ls SINGULAR VECTORS
! IN THE DEFLATION ALGORITHM.
!
    integer(i4b), parameter :: prtunit=6, n=3000, m=3000, ls=3000, max_qr_steps=4_i4b
!
! tol IS A TOLERANCE FOR DETERMINING A CONSERVATIVE ESTIMATE OF THE RANK OF THE
! GENERATED MATRIX (E.G., A THRESHOLD FOR THE INVERSE OF THE CONDITION NUMBER OF
! THE TOP SINGULAR APPROXIMATION OF THE GENERATED MATRIX).
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50, tol=0.0001_stnd
!
    character(len=*), parameter :: name_proc='Example 2 of select_singval_cmp4'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, abstol, anorm, tmp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, a2, p, leftvec, rightvec, resid, ra
    real(stnd), dimension(:),   allocatable :: s, d, e, tauo, resid2
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing, mnthr_nsing, nvec
!
    logical(lgl) :: failure1, failure2, failure_bd, ortho, gen_p, reortho, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : PARTIAL SVD OF A n-BY-m REAL MATRIX WITH n>=m USING THE RHALA-BARLOW
!               ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM, A DQDS OR BISECTION ALGORITHM FOR
!               SINGULAR VALUES AND THE GODUNOV DEFLATION TECHNIQUE FOR SINGULAR VECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHM.
!
!   FIRST DETERMINE HOW SINGULAR VALUES ARE COMPUTED. IF
!   dqds IS SET TO TRUE THE DQDS ALGORITHM IS USED
!   INSTEAD OF BISECTION AND ALL SINGULAR VALUES ARE COMPUTED.
!
    dqds = false
!
!   NEXT CHOOSE TUNING PARAMETERS FOR THE BISECTION ALGORITHM.
!
!   SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G., sqrt(safmin) ).
!
    abstol = sqrt( safmin )
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW BIDIAGONAL ALGORITHM.
!
    reortho = false
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   DETERMINE CROSSOVER POINT FOR THE BACK-TRANSFORMATION
!   ALGORITHM WHEN COMPUTING RIGHT SINGULAR VECTORS. IF
!   ls EXCEEDS mnthr_nsing, THE RIGHT ORTHOGONAL MATRIX p
!   OF THE ONE-SIDED BIDIAGONAL REDUCTION ALGORITHM IS GENERATED
!   AND BACK-TRANSFORMATION IS DONE BY A MATRIX MULTIPLICATION.
!
    mnthr_nsing = int( real( m, stnd )*c0_9, i4b )
!
    gen_p = ls>=mnthr_nsing
!    gen_p = true
!    gen_p = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), s(m), d(m), e(m), &
              p(m,m), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a.
!
    call random_number( a )
!
    if ( do_test ) then
!
!       COMPUTE THE FROBENIUS NORM OF THE RANDOM MATRIX.
!
        anorm = norm( a )
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,m), resid(n,m), resid2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE S IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, U IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   V IS AN m-BY-m ORTHOGONAL MATRIX. THE DIAGONAL ELEMENTS OF S
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE THE ls LARGEST SINGULAR VALUES OF a AND THE
!   ASSOCIATED LEFT AND RIGHT SINGULAR VECTORS OF a (E.G. A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS:
!
!   STEP1 : COMPUTE THE ls LARGEST SINGULAR VALUES OF a BY THE RHALA-BARLOW ONE-SIDED
!   BIDIAGONAL REDUCTION ALGORITHM APPLIED TO a, A BISECTION OR DQDS METHOD APPLIED TO THE
!   THE RESULTING BIDIAGONAL MATRIX AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE
!   select_singval_cmp4.
!
!   THE OPTIONAL ARGUMENT ls OF select_singval_cmp4 MAY BE USED TO DETERMINE HOW MANY
!   SINGULAR VALUES MUST BE COMPUTED IF BISECTION IS USED.
!
!   THE OPTIONAL ARGUMENT abstol OF select_singval_cmp4 MAY BE USED TO SET THE REQUIRED
!   PRECISION FOR THE SINGULAR VALUES IF BISECTION IS USED. A SINGULAR VALUE IS CONSIDERED TO
!   BE LOCATED IF IT HAS BEEN DETERMINED TO LIE IN AN INTERVAL WHOSE WIDTH IS EQUAL OR LESS
!   THAN abstol. SINGULAR VALUES WILL BE COMPUTED MOST ACCURATELY WHEN abstol IS SET
!   TO THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD (E.G. sqrt(safmin) ) IF BISECTION IS USED.
!
!   select_singval_cmp4 IS FASTER THAN select_singval_cmp3, BUT MAY BE LESS ACCURATE
!   FOR SOME MATRICES IF BISECTION IS USED.
!
    call select_singval_cmp4( a, nsing, s, failure=failure1, sort=sort, d=d, e=e,      &
                              ls=ls, abstol=abstol, p=p, gen_p=gen_p, reortho=reortho, &
                              dqds=dqds, failure_bd=failure_bd )
!
!   ON EXIT OF select_singval_cmp4 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT IN THE BISECTION OR DQDS ALGORITHM.
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE
!                     COMPUTATION OF THE SINGULAR VALUES OF a.
!                     THE SIGN OF THE INCORRECT SINGULAR VALUES IS SET TO NEGATIVE.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM ARE STORED IN a AND p .
!
!   THE SINGULAR VALUES ARE STORED IN s IN DECREASING ORDER (IF sort='d'). HERE, THIS IS
!   REQUIRED BY SUBROUTINE bd_deflate2 IN THE SECOND STEP.
!
!   nsing GIVES THE NUMBER OF SINGULAR VALUES REALLY COMPUTED BY select_singval_cmp4.
!   nsing MAY BE GREATER THAN ls IF CLUSTERS OF SINGULAR VALUES ARE PRESENT OR IF
!   DQDS IS USED.
!   THE nsing LARGEST SINGULAR VALUES ARE STORED IN THE FIRST nsing POSITIONS OF s
!   IN DECREASING ORDER IF sort='d'.
!
!   CORRECT nsing IF DQDS IS USED.
!
    if ( dqds ) then
        nsing = ls
    end if
!
!   STEP2 : COMPUTE nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   APPLIED TO THE INTERMEDIATE BIDIAGONAL MATRIX AND BACK-TRANSFORMATION WITH SUBROUTINE
!   bd_deflate2.
!
    if ( nsing>0 ) then
!
!       ALLOCATE WORK VARIABLES NEEDED TO STORE THE SINGULAR VECTORS.
!
        allocate( leftvec(n,nsing),    &
                  rightvec(m,nsing),   &
                  stat = iok    )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY 
!       A DEFLATION TECHNIQUE APPLIED ON THE INTERMEDIATE BIDIAGONAL MATRIX
!       d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_deflate2 .
!
!       ON ENTRY OF bd_deflate2: PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL
!       MATRIX (OR OF a). THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!       OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!       COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!       (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!       SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
!       OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!       DEFLATING A BIDIAGONAL MATRIX FOR A GIVEN SINGULAR VALUE IN THE DEFLATION ALGORITHM.
!       THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL SINGULAR VALUES
!       EXCEEDS max_qr_steps * nsing.
!
        call bd_deflate2( a(:n,:m), p(:m,:m), d(:m), e(:m), s(:nsing),  &
                          leftvec(:n,:nsing), rightvec(:m,:nsing),      &
                          failure=failure2, ortho=ortho,                &
                          max_qr_steps=max_qr_steps                     )
!
!       ON EXIT OF bd_deflate2 :
!
!           failure= false :  INDICATES SUCCESSFUL EXIT IN THE DEFLATION ALGORITHM
!           failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE
!                             DEFLATION ALGORITHM.

!           leftvec AND rightvec CONTAIN, RESPECTIVELY, THE FIRST nsing LEFT AND RIGHT
!           SINGULAR VECTORS OF a .
!
!       bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!       IDENTICAL FOR SOME PATHOLOGICAL MATRICES.
!
    end if
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test .and. nsing>0 ) then
!
!       DETERMINE THE EFFECTIVE RANK OF a .
!
        tmp = max( tol*s(1_i4b), safmin )
!
        nvec = ifirstloc( logical( s(:nsing)<=tmp, lgl ) ) - 1_i4b
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*v(:m,:nvec) - u(:n,:nvec)*s(:nvec,:nvec),
!       WHERE u AND v ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nvec) = matmul(a2,rightvec(:m,:nvec)) - leftvec(:n,:nvec)*spread(s(:nvec),dim=1,ncopies=n)
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 = maxval( resid2(:nvec) )/( anorm*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u(:n,:nvec)**(t)*u(:n,:nvec).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nvec,:nvec) = abs( a2(:nvec,:nvec) - matmul( transpose(leftvec(:n,:nvec)), leftvec(:n,:nvec) ) )
!
        err2 = maxval( resid(:nvec,:nvec) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - v(:m,:nsing)**(t)*v(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
!
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    if ( do_test ) then
!
        deallocate( a2, resid, resid2 )
!
    end if
!
    if ( nsing>0 ) then
!
        deallocate( a, s, d, e, p, leftvec, rightvec )
!
    else
!
        deallocate( a, s, d, e, p )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from select_singval_cmp4() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from select_singval_cmp4() ) = ', failure1
    write (prtunit,*) ' FAILURE    ( from bd_deflate2() )         = ', failure2
!
    if ( do_test .and. nsing>0 ) then
!
        write (prtunit,*) 
        write (prtunit,*) 'Number of requested singular triplets                = ', ls
        write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
        write (prtunit,*) 'Number of safe singular triplets                     = ', nvec
!
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
!
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', nsing, ' singular values and vectors of a ', &
       n, ' by ', m,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_select_singval_cmp4
! ======================================
!
end program ex2_select_singval_cmp4

ex2_solve_lin.F90

program ex2_solve_lin
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of fonction SOLVE_LIN
!   in module Lin_Procedures for solving a real linear system with several right
!   hand sides.
!
!
! LATEST REVISION : 09/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, true, false, zero, half, safmin, solve_lin, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE LINEAR SYSTEM
! AND nrhs IS THE NUMBER OF RIGHT HAND SIDES.
!
    integer(i4b), parameter :: prtunit=6, n=4000, nrhs=4000
!
    character(len=*), parameter :: name_proc='Example 2 of solve_lin'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, eps, tmp, elapsed_time
    real(stnd), dimension(:,:), allocatable :: a, b, x, x2, res
!
    integer :: iok, istart, iend, irate
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SOLVING A LINEAR SYSTEM WITH A REAL n-BY-n MATRIX
!               AND SEVERAL RIGHT HAND-SIDES WITH THE LU DECOMPOSITION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = real( n, stnd)*sqrt( epsilon( err ) )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), b(n,nrhs), x(n,nrhs), x2(n,nrhs), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    a = a - half
!
    call random_number( tmp )
!
    if ( tmp>safmin ) then
        a = a/tmp
    end if
!
!   GENERATE A n-by-nrhs RANDOM SOLUTION MATRIX x .
!
    call random_number( x )
!
!   COMPUTE THE MATRIX-MATRIX PRODUCT b = a*x .
!
    b = matmul( a, x )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count=istart, count_rate=irate )
!
!   COMPUTE THE SOLUTION MATRIX FOR LINEAR SYSTEM
!
!            a*x = b
!
!   BY COMPUTING THE LU DECOMPOSITION WITH PARTIAL PIVOTING AND
!   IMPLICIT ROW SCALING OF MATRIX a WITH FUNCTION solve_lin.
!   ARGUMENTS a AND b ARE NOT MODIFIED BY THE FUNCTION.
!
    x2 = solve_lin( a, b )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    elapsed_time = real( iend - istart, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(n,nrhs), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THE RESULTS FOR SMALL RESIDUALS.
!
        res(:n,:nrhs) = x2(:n,:nrhs) - x(:n,:nrhs)
        err = maxval( sum( abs(res), dim=1 ) /    &
                      sum(abs(x),    dim=1 )      )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, x2 )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the solutions of a linear real system of size ', &
       n, ' with', nrhs,' right hand sides is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_solve_lin
! ============================
!
end program ex2_solve_lin

ex2_solve_llsq.F90

program ex2_solve_llsq
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of fonction SOLVE_LLSQ
!   in module LLSQ_Procedures for solving a linear least squares problem
!   with several right hand sides.
!
!
! LATEST REVISION : 09/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c500, allocate_error, &
                         merror, solve_llsq
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
! nrhs IS THE NUMBER OF COLUMNS OF THE RIGHT HAND SIDE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=2000, nrhs=10
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c500
!
    character(len=*), parameter :: name_proc='Example 2 of solve_llsq'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, err, elapsed_time
    real(stnd), allocatable, dimension(:,:) :: a, x, res, b
!
    integer(i4b) :: krank
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, min_norm
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SOLVING A LINEAR LEAST SQUARES SYSTEM WITH SEVERAL RIGHT HAND-SIDES:
!
!                              a(:m,:n)*x(:n,:nrhs) ≈ b(:m,:nrhs)
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = real( m, stnd)*epsilon( err )
    eps = fudge*ulp
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   CHOOSE TUNING PARAMETERS FOR THE LLSQ ALGORITHM.
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED.
!
    krank = 0
!
!   DECIDE IF THE MINIMUN 2-NORM SOLUTION(S) MUST BE COMPUTED.
!
    min_norm = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m,nrhs), x(n,nrhs), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE MATRIX b(:m,:nrhs) .
!
    call random_number( b(:m,:nrhs) )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE SOLUTION MATRIX x FOR LINEAR LEAST SQUARES PROBLEM
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OF a. a IS A m-ELEMENT
!   VECTOR OR AN m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. IF a IS A MATRIX, m>=n OR
!   n>m IS PERMITTED.
!
    x(:n,:nrhs) = solve_llsq( a(:m,:n), b(:m,:nrhs), krank=krank, tol=tol, min_norm=min_norm )
!
!   ON INPUT, IF THE OPTIONAL PARAMETER krank IS SPECIFIED AND IF krank=k,
!   THIS IMPLIES THAT THE FIRST k COLUMNS OF a ARE TO BE FORCED INTO THE BASIS.
!   PIVOTING IS PERFORMED ON THE LAST n-k COLUMNS OF a.
!   
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, SPECIFIC CALCULATIONS TO DETERMINE THE EFFECTIVE RANK a
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS THEN DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IN THE 1-NORM IS LESS THAN 1/tol.
!       IF tol=0 IS SPECIFIED THE NUMERICAL RANK OF a IS DETERMINED.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE EFFECTIVE RANK OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING krank=0  AND tol=RELATIVE PRECISION OF THE ELEMENTS
!   IN a. IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD
!   BE USED. ALSO, IT MAY BE HELPFUL TO SCALE THE COLUMNS OF a SO THAT ALL ELEMENTS 
!   ARE ABOUT THE SAME ORDER OF MAGNITUDE.
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL LOGICAL PARAMETER min_norm IS
!   PRESENT AND IS SET TO true . OTHERWISE, SOLUTION(S) ARE COMPUTED SUCH THAT IF THE jTH COLUMN OF a
!   IS OMITTED FROM THE BASIS, x[j] OR x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( res(m,nrhs), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF COEFFICIENT MATRIX a .
!
        res(:m,:nrhs) = b(:m,:nrhs) - matmul( a(:m,:n), x(:n,:nrhs) )
!
        err = maxval( abs( matmul( transpose( res(:m,:nrhs) ), a(:m,:n) ) ) )/ sum( abs(a(:m,:n)) )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( res )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x )
!
!   PRINT THE RESULT OF THE TEST.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (*,'(a,i6,a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
      m, ' by ', n,' real coefficient matrix and a ', m, ' by ', nrhs,       &
      ' right hand side matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_solve_llsq
! =============================
!
end program ex2_solve_llsq

ex2_svd_cmp.F90

program ex2_svd_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP
!   in module SVD_Procedures for computing all singular values of a real n-by-m matrix
!   by a fast variant of the Golub-Reinsch bidiagonal reduction algorithm and, at the user
!   option, the bisection, dqds or QR method with implicit shift methods for computing the                                       
!   singular values of the bidiagonal form of the real n-by-m matrix.
!
!
! Further Details
! ===============
!
!   The program also shows the use of subroutine BD_INVITER2 in module SVD_Procedures
!   for computing the leading singular vectors of a real matrix and a partial SVD
!   of a real matrix by inverse iterations.
!
!
! LATEST REVISION : 08/02/2024
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, true, false, bd_inviter2, svd_cmp, &
                         norm, unit_matrix, c50, c100, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP SINGULAR VECTORS,
!
    integer(i4b), parameter :: prtunit=6, n=1000, m=1000, mn=min(m,n), maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of svd_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, &
                                               min_explnorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, d, e, tauq, taup
    real(stnd), allocatable, dimension(:,:) :: a, a2, leftvec, rightvec, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing
!
    logical(lgl) :: dqds, bisect, failure1, failure2, do_test
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : ALL SINGULAR VALUES AND nsing SINGULAR VECTORS OF A n-BY-m REAL MATRIX
!               BY THE INVERSE ITERATION METHOD (E.G. PARTIAL SVD DECOMPOSITION). nsing
!               IS DETERMINED AS THE NUMBER OF SINGULAR TRIPLETS WHICH DESCRIBED AT LEAST
!               90% OF THE FROBENIUS NORM OF THE REAL MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   COMPUTE THE NUMBER OF SINGULAR TRIPLETS SUCH THAT THEY DESCRIBE AT LEAST 90%
!   OF THE SQUARED NORM OF THE REAL MATRIX.
!
    min_explnorm = 90._stnd
!
!   DETERMINE HOW SINGULAR VALUES ARE COMPUTED BY DQDS, BISECTION
!   OR THE BIDIAGONAL IMPLICIT QR ALGORITHM.
!
    bisect = true
!
    dqds = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), s(mn), d(mn), e(mn), tauq(mn), taup(mn), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a .
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(n,m), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE s IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, u IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   v IS AN m-BY-m ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF s
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE ALL THE SINGULAR VALUES OF a AND
!   ONLY nsing LEFT AND RIGHT SINGULAR VECTORS OF a (EG A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS. nsing IS DETERMINED AS THE NUMBER OF SINGULAR TRIPLETS WHICH DESCRIBED
!   AT LEAST 90% OF THE FROBENIUS NORM OF a.
!
!   SEE EXAMPLE ex1_svd_cmp.f90, IF YOU WANT TO COMPUTE A FULL SVD OF a .  
!   THIS PROGRAM SHOWS HOW TO COMPUTE A PARTIAL SVD OF a WITH SUBROUTINES
!   svd_cmp AND bd_inviter2.
!
!   STEP1 : COMPUTE ALL SINGULAR VALUES OF a AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE svd_cmp.
!
    call svd_cmp( a, s, failure=failure1, sort=sort, bisect=bisect, dqds=dqds, &
                  d=d, e=e, tauq=tauq, taup=taup  )
!
!   ON EXIT OF svd_cmp :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT;
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                     SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a .
!
!   s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   HERE, SORT = 'd' IS USED THIS IS REQUIRED FOR THE USE OF bd_inviter2.
!                
!   IF THE PARAMETER v IS ABSENT, svd_cmp COMPUTES ONLY THE SINGULAR VALUES OF a             
!   AND, OPTIONALLY, STORES THE INTERMEDIATE BIDIAGONAL FORM OF a AND THE ORTHOGONAL
!   MATRICES USED TO REDUCE a TO BIDIAGONAL FORM.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN THE OPTIONAL ARGUMENTS d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM d_e ARE STORED 
!   IN mat, tauq, taup, WHEN THE OPTIONAL ARGUMENTS tauq AND taup ARE PRESENT.
!
!   NOW SELECT THE NUMBER OF SINGULAR TRIPLETS SUCH THAT THEY DESCRIBE AT LEAST
!   min_explnorm OF THE SQUARED NORM OF THE REAL MATRIX a .
!
    tmp  = zero
    tmp2 = c100/sum( s(:mn)**2 )
!
    do nsing= 1_i4b, mn
!
        tmp = tmp + tmp2*s(nsing)**2
        if ( tmp>=min_explnorm ) exit
!
    end do
!
!   CHECK THE nsing VALUE.
!
    nsing = min( nsing, mn )
!
!   ALLOCATE WORK ARRAYS FOR THE SINGULAR VECTORS.
!
    allocate( leftvec(n,nsing), rightvec(m,nsing), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   STEP2 : COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY maxiter
!   INVERSE ITERATIONS ON THE INTERMEDIATE BIDIAGONAL MATRIX
!   d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_inviter2 .
!
    call bd_inviter2( a, tauq, taup, d, e, s(:nsing), leftvec, rightvec, failure=failure2, maxiter=maxiter )
!
!   ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!   THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!   ON EXIT OF bd_inviter2 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT.
!   failure= true  :  INDICATES THAT THE ALGORITHM FAILS TO CONVERGE.
!
!   THE SINGULAR VECTORS OF THE BIDIAGONAL FORM d_e ARE COMPUTED USING maxiter INVERSE ITERATIONS
!   FOR THE nsing SINGULAR VALUES AT ONE STEP. THE SINGULAR VECTORS OF d_e ARE THEN ORTHOGONALIZED BY 
!   THE MODIFIED GRAM-SCHMIDT ALGORITHM IF NECESSARY. THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED 
!   BY BACK TRANSFORMATIONS. ON EXIT, leftvec AND rightvec CONTAIN THE FIRST nsing LEFT AND RIGHT
!   SINGULAR VECTORS OF a, RESPECTIVELY.
!
!   NOTE THAT bd_inviter2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        a2(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
        err1 = maxval( a2(:nsing,1_i4b) )/ ( sum( abs(s(:mn)) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, resid, leftvec, rightvec, s, d, e, tauq, taup )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, leftvec, rightvec, s, d, e, tauq, taup )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from svd_cmp() )     = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_inviter2() ) = ', failure2
!
    write (prtunit,*) 
    write (prtunit,*) 'Explained squared norm (%)                           = ', tmp
    write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a', &
       n, ' by', m,' real matrix is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_svd_cmp
! ==========================
!
end program ex2_svd_cmp

ex2_svd_cmp2.F90

program ex2_svd_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP
!   in module SVD_Procedures for computing all singular values of a real n-by-m matrix
!   by a fast variant of the Golub-Reinsch bidiagonal reduction algorithm and, at the user
!   option, the bisection, dqds or QR method with implicit shift methods for computing the                                       
!   singular values of the bidiagonal form of the real n-by-m matrix.
!
!
! Further Details
! ===============
!
!   The program also shows the use of subroutine BD_DEFLATE2 in module SVD_Procedures
!   for computing the leading singular vectors of a real matrix and a partial SVD
!   of a real matrix by a delfation technique.
!
!
! LATEST REVISION : 08/02/2024
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, zero, true, false, bd_deflate2, svd_cmp2, &
                         norm, unit_matrix, c50, c100, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n AND m ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! max_qr_steps IS THE MAXIMUM NUMBER OF QR STEPS PERFORMED TO COMPUTE THE TOP SINGULAR
! VECTORS IN THE DEFLATION ALGORITHM.
!
    integer(i4b), parameter :: prtunit=6, n=1000, m=1000, mn=min(m,n), max_qr_steps=3_i4b
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of svd_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err, err1, err2, err3, eps, tmp, tmp2, &
                                               min_explnorm, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, d, e, tauq, taup
    real(stnd), allocatable, dimension(:,:) :: a, a2, leftvec, rightvec, resid
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: nsing
!
    logical(lgl) :: dqds, bisect, failure1, failure2, ortho, do_test
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : ALL SINGULAR VALUES AND nsing SINGULAR VECTORS OF A n-BY-m REAL MATRIX
!               BY A DEFLATION METHOD (E.G., PARTIAL SVD DECOMPOSITION).
!               nsing IS DETERMINED AS THE NUMBER OF SINGULAR TRIPLETS WHICH DESCRIBED
!               AT LEAST 90% OF THE FROBENIUS NORM OF THE REAL MATRIX.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   COMPUTE THE NUMBER OF SINGULAR TRIPLETS SUCH THAT THEY DESCRIBE AT LEAST 90%
!   OF THE SQUARED NORM OF THE REAL MATRIX.
!
    min_explnorm = 90._stnd
!
!   SPECIFY IF BISECTION OR DQDS IS USED FOR COMPUTING THE SINGULAR VALUES
!   INSTEAD OF THE BIDIAGONAL IMPLICIT QR METHOD.
!
    bisect = false
!
    dqds = true
!
!   CHOOSE TUNING PARAMETERS FOR THE DEFLATION ALGORITHM.
!
!   DETERMINE IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED SINGULAR VECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF SINGULAR VALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE SINGULAR VECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,m), s(mn), d(mn), e(mn), tauq(mn), taup(mn), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A n-BY-m RANDOM REAL MATRIX a .
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(n,m), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       MAKE A COPY OF THE RANDOM DATA MATRIX .
!
        a2(:n,:m) = a(:n,:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL n-BY-m MATRIX a
!   IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE s IS AN n-BY-m MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, u IS AN n-BY-n ORTHOGONAL MATRIX, AND
!   v IS AN m-BY-m ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF s
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   THIS PROGRAM ILLUSTRATES HOW TO COMPUTE ALL THE SINGULAR VALUES OF a AND
!   ONLY nsing LEFT AND RIGHT SINGULAR VECTORS OF a (EG A PARTIAL SVD DECOMPOSITION OF a)
!   IN TWO STEPS. nsing IS DETERMINED AS THE NUMBER OF SINGULAR TRIPLETS WHICH DESCRIBED
!   AT LEAST 90% OF THE FROBENIUS NORM OF a.
!
!   SEE EXAMPLE ex1_svd_cmp2.f90, IF YOU WANT TO COMPUTE A FULL SVD OF a.  
!   THIS PROGRAM SHOWS HOW TO COMPUTE A PARTIAL SVD OF a WITH SUBROUTINES
!   svd_cmp2 AND bd_deflate2.
!
!   STEP1 : COMPUTE ALL SINGULAR VALUES OF a AND STORE INTERMEDIATE RESULTS WITH SUBROUTINE svd_cmp2.
!
    call svd_cmp2( a, s, failure=failure1, sort=sort, bisect=bisect, dqds=dqds, &
                   d=d, e=e, tauq=tauq, taup=taup  )
!
!   ON EXIT OF svd_cmp2 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT
!   failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                     THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                     SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a.
!
!   s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!   HERE, SORT = 'd' IS USED THIS IS REQUIRED FOR THE USE OF bd_deflate2 .
!                
!   IF THE PARAMETER u_vt IS ABSENT, svd_cmp2 COMPUTES ONLY THE SINGULAR VALUES OF a             
!   AND, OPTIONALLY, STORES THE INTERMEDIATE BIDIAGONAL FORM OF a AND THE ORTHOGONAL
!   MATRICES USED TO REDUCE a TO BIDIAGONAL FORM.
!
!   THE INTERMEDIATE BIDIAGONAL FORM OF a IS STORED IN THE OPTIONAL ARGUMENTS d AND e.
!   THE ORTHOGONAL MATRICES USED TO REDUCE a TO BIDIAGONAL FORM d_e ARE STORED 
!   IN mat, tauq, taup, WHEN THE OPTIONAL ARGUMENTS tauq AND taup ARE PRESENT.
!
!   NOW SELECT THE NUMBER OF SINGULAR TRIPLETS SUCH THAT THEY DESCRIBE AT LEAST 90%
!   OF THE NORM OF THE REAL MATRIX a.
!
    tmp  = zero
    tmp2 = c100/sum( s(:mn)**2 )
!
    do nsing= 1_i4b, mn
!
        tmp = tmp + tmp2*s(nsing)**2
        if ( tmp>=min_explnorm ) exit
!
    end do
!
!   CHECK THE nsing VALUE.
!
    nsing = min( nsing, mn )
!
!   ALLOCATE WORK ARRAYS FOR THE SINGULAR VECTORS.
!
    allocate( leftvec(n,nsing), rightvec(m,nsing), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   STEP2 : COMPUTE THE FIRST nsing LEFT AND RIGHT SINGULAR VECTORS OF a BY A DEFLATION TECHNIQUE
!   ON THE INTERMEDIATE BIDIAGONAL MATRIX d_e AND BACK-TRANSFORMATION WITH SUBROUTINE bd_deflate2.
!
    call bd_deflate2( a(:n,:m), tauq(:mn), taup(:mn), d(:mn), e(:mn), s(:nsing),   &
                      leftvec(:n,:nsing), rightvec(:m,:nsing), failure=failure2,   &
                      ortho=ortho, max_qr_steps=max_qr_steps                       )
!
!   ON ENTRY, PARAMETER s CONTAINS SELECTED SINGULAR VALUES OF THE BIDIAGONAL MATRIX d_e (OR a).
!   THE SINGULAR VALUES MUST BE GIVEN IN DECREASING ORDER.
!
!   ON EXIT OF bd_deflate2 :
!
!   failure= false :  INDICATES SUCCESSFUL EXIT.
!   failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE DEFLATION ALGORITHM.
!
!   THE SINGULAR VECTORS OF THE BIDIAGONAL FORM d_e CORRESPONDING TO THE nsing SINGULAR VALUES ARE
!   COMPUTED USING A DEFLATION TECHNIQUE. THE SINGULAR VECTORS OF a ARE FINALLY COMPUTED BY BACK
!   TRANSFORMATIONS.
!   ON EXIT, leftvec AND rightvec CONTAIN, RESPECTIVELY, THE nsing LEFT AND RIGHT SINGULAR VECTORS OF a,
!   ASSOCIATED WITH THE SINGULAR VALUES SPECIFIED IN s(:nsing).

!   bd_deflate2 MAY FAIL IF SOME THE SINGULAR VALUES SPECIFIED IN PARAMETER s ARE NEARLY
!   IDENTICAL.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( resid(n,nsing), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*V(:m,:nsing) - U(:n,:nsing)*S(:nsing,:nsing),
!       WHERE leftvec AND rightvec ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
        resid(:n,:nsing) = matmul(a2,rightvec) - leftvec*spread(s(:nsing),dim=1,ncopies=n)
        a2(:nsing,1_i4b) = norm( resid(:n,:nsing), dim=2_i4b )
        err1 = maxval( a2(:nsing,1_i4b) )/ ( sum( abs(s(:mn)) )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U(:n,:nsing)**(t)*U(:n,:nsing).
!
        call unit_matrix( a2(:nsing,:nsing) )
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(leftvec), leftvec ) )
        err2 = maxval( resid(:nsing,:nsing) )/real(n,stnd)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - V(:m,:nsing)**(t)*V(:m,:nsing).
!
        resid(:nsing,:nsing) = abs( a2(:nsing,:nsing) - matmul( transpose(rightvec), rightvec ) )
        err3 = maxval( resid(:nsing,:nsing) )/real(m,stnd)
!
        err = max( err1, err2, err3 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, resid, leftvec, rightvec, s, d, e, tauq, taup )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, leftvec, rightvec, s, d, e, tauq, taup )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from svd_cmp() )     = ', failure1
    write (prtunit,*) ' FAILURE ( from bd_deflate2() ) = ', failure2
!
    write (prtunit,*) 
    write (prtunit,*) 'Explained squared norm (%)                           = ', tmp
    write (prtunit,*) 'Number of computed singular triplets                 = ', nsing
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed singular triplets           = ', err1
        write (prtunit,*) 'Orthogonality of the computed left singular vectors  = ', err2
        write (prtunit,*) 'Orthogonality of the computed right singular vectors = ', err3
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values and ', nsing, ' singular vectors of a', &
       n, ' by', m,' real matrix is', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_svd_cmp2
! ===========================
!
end program ex2_svd_cmp2

ex2_svd_cmp3.F90

program ex2_svd_cmp3
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP3
!   in module SVD_Procedures for computing all singular values of a real m-by-n matrix
!   by the Rhala-Barlow one-sided bidiagonal reduction algorithm and, at the user
!   option, the bisection, dqds or QR method with implicit shift methods for computing the
!   singular values of the bidiagonal form of the real m-by-n matrix.
!
!
! LATEST REVISION : 08/02/2024
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, &
                         svd_cmp3, random_seed_, random_number_, gen_random_mat,    &
                         singval_sort, merror, allocate_error
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO k)
! FOR CASES GREATER THAN 0.
!
    integer(i4b), parameter :: prtunit = 6, m=3000, n=3000, k=min(m,n), nsvd0=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 2 of svd_cmp3'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: tmp, tmp2, abs_err, rel_err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, s0, scal
    real(stnd), allocatable, dimension(:,:) :: a
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl) :: failure, failure_bd, reortho, dqds, bisect, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : SINGULAR VALUES OF A FULL SVD OF A REAL MATRIX USING THE
!               ONE-SIDED RALHA-BARLOW BIDIAGONAL REDUCTION ALGORITHM AND
!               THE BIDIAGONAL QR IMPLICIT, DQDS OR BISECTION METHOD FOR
!               COMPUTING THE SINGULAR VALUES.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 7_i4b
!
!   CHOOSE TUNING PARAMETERS FOR THE ALGORITHMS.
!
!   SPECIFY IF REORTHOGONALIZATION IS PERFORMED IN
!   THE ONE-SIDED RHALA-BARLOW ALGORITHM.
!
    reortho = false
!
!   SPECIFY IF BISECTION OR DQDS IS USED FOR COMPUTING THE SINGULAR VALUES
!   INSTEAD OF THE QR IMPLICIT METHOD. BISECTION IS USUALLY MORE ACCURATE
!   BUT SLOWER.
!
    bisect = false
!
    dqds = true
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), s(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
    end if
!
    if ( do_test .and. mat_type>0_i4b ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( s0(k), scal(k), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE TRUE SINGULAR VALUES.
!
        s0(:nsvd0) = s(:nsvd0)
        s0(nsvd0+1_i4b:k) = zero
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   svd_cmp3 COMPUTES THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   m-BY-n MATRIX a. THE SVD IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE s IS AN m-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, u IS AN m-BY-m ORTHOGONAL MATRIX, AND
!   v IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF s
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   HERE, svd_cmp3 IS USED TO COMPUTE ONLY THE SINGULAR VALUES OF THE INPUT MATRIX.
!
    call svd_cmp3( a, s, failure, sort=sort, bisect=bisect, dqds=dqds, &
                   reortho=reortho, failure_bd=failure_bd )
!
!   THE ROUTINE RETURNS THE SINGULAR VALUES IN ARGUMENT s.
!
!   ON EXIT OF svd_cmp3 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a.
!
!       s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test .and. mat_type>0_i4b ) then
!
!       COMPUTE ABSOLUTE ERRORS OF SINGULAR VALUES.
!
        abs_err = maxval( abs( s(:k) - s0(:k) ) )
!
!       COMPUTE RELATIVE ERRORS OF SINGULAR VALUES.
!
        where( s0(:k)/=zero )
            scal(:k) = s0(:k)
        elsewhere
            scal(:k) = one
        end where
!
        rel_err = maxval( abs( (s(:k) - s0(:k))/scal(:k) ) )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( s0, scal )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, s )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    if ( do_test .and. mat_type>0_i4b  ) then
!
        write (prtunit,*) 
!
        write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
        write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE_BD ( from svd_cmp3() ) = ', failure_bd
    write (prtunit,*) ' FAILURE    ( from svd_cmp3() ) = ', failure
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_svd_cmp3
! ===========================
!
end program ex2_svd_cmp3

ex2_symtrid_bisect.F90

program ex2_symtrid_bisect
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SYMTRID_BISECT
!   in module Eig_Procedures for computing selected or all eigenvalues of a real
!   symmetric tridiagonal matrix by a bisection method.                                                                  
!                                                     
!                                                     
! Further Details
! ===============
!
!    The program also shows the use of subroutines SYMTRID_CMP and TRID_DEFLATE
!    in module EIG_Procedures for reducing a real symmetric matrix to tridiagonal
!    form and computing selected eigenvectors of a real symmetric (tridiagonal) matrix
!    by a deflation method.
!
!
! LATEST REVISION : 05/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, safmin, symtrid_cmp,  &
                         symtrid_bisect, trid_deflate, norm, unit_matrix, merror,      &
                         allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX,
! neig IS THE NUMBER OF THE WANTED EIGENVALUES/EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of symtrid_bisect'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, abstol, elapsed_time
    real(stnd), allocatable, dimension(:)   :: eigval, resid2, d, e
    real(stnd), allocatable, dimension(:,:) :: a, eigvec, a2
!
    integer       :: iok, istart, iend, irate, imax, itime
    integer(i4b)  :: max_qr_steps, neig2
!
    logical(lgl)  :: failure1, failure2, do_test, ortho
!   
    character     :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : FIRST neig EIGENVALUES AND, OPTIONALLY, EIGENVECTORS OF
!               A REAL SYMMETRIC MATRIX USING BISECTION AND A DEFLATION METHOD
!               FOR THE EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE PARAMETERS OF THE BISECTION ALGORITHM.
!
!   THE EIGENVALUES ARE COMPUTED WITH MAXIMUM ACCURACY WHEN OPTIONAL PARAMETER abstol IS SET TO
!   sqrt( safmin ), WHICH IS THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD.
!
    abstol = sqrt( safmin )
!
!   DETERMINE PARAMETERS OF THE DEFLATION ALGORITHM.
!
!   OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED EIGENVECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF EIGENVALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE EIGENVECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A TRIDIAGONAL MATRIX FOR A GIVEN EIGENVALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL REQUESTED
!   EIGENVALUES EXCEEDS max_qr_steps * neig.
!
    max_qr_steps = 4_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(n,n), eigvec(n,neig), eigval(n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM DATA MATRIX AND FROM IT
!   A SELF-ADJOINT MATRIX a .
!
    call random_number( a )
!
    a = a + transpose( a ) 
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(n,n), resid2(neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE RANDOM SELF-ADJOINT MATRIX a .
!
        a2(:n,:n) = a(:n,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
!   FIRST, REDUCE TO TRIDIAGONAL FORM THE SELF-ADJOINT MATRIX a AND SAVE
!   THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETERS d AND e WITH
!   SUBROUTINE symtrid_cmp.
!
    call symtrid_cmp( a, d, e, store_q=true )
!
!   ON EXIT OF symtrid_cmp, THE UPPER TRIANGLE OF a IS OVERWRITTEN WITH THE HOUSEHOLDER TRANSFORMATIONS
!   USED TO REDUCE a TO TRIDIAGONAL FORM IF THE PARAMETER store_q IS PRESENT AND SET TO true, OTHERWISE
!   THE UPPER TRIANGLE OF a IS DESTROYED AND THE HOUSEHOLDER TRANSFORMATIONS ARE NOT SAVED.
!   PARAMETERS d AND e STORE, RESPECTIVELY, THE DIAGONAL AND THE OFF-DIAGONAL ELEMENTS OF THE
!   INTERMEDIATE TRIDIAGONAL FORM OF a .
!
!   NEXT, COMPUTE neig EIGENVALUES OF THE INTERMEDIATE TRIDIAGONAL MATRIX BY A BISECTION METHOD
!   WITH HIGH ACCURACY. THE EIGENVALUES ARE COMPUTED WITH MAXIMUM ACCURACY WHEN OPTIONAL PARAMETER
!   abstol IS SET TO sqrt( safmin ), WHICH IS THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD.
!
    call symtrid_bisect( d, e, neig2, eigval, failure=failure1, sort=sort, le=neig, abstol=abstol )
!
!   ON EXIT OF symtrid_bisect:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BISECTION ALGORITHM. 
!                         APPLIED TO THE INTERMEDIATE TRIDIAGONAL FORM OF a .
!
!       eigval IS OVERWRITTEN WITH THE REQUESTED EIGENVALUES OF a.
!       IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!       IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
!   FINALLY, COMPUTE THE FIRST neig EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY A
!   DEFLATION TECHNIQUE APPLIED ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!   d_e AND BACK-TRANSFORMATION.
!
!   ON ENTRY OF SUBROUTINE trid_deflate, PARAMETER eigval CONTAINS SELECTED
!   EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!   THE EIGENVALUES VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call trid_deflate( d(:n), e(:n), eigval(:neig), eigvec(:n,:neig), failure=failure2,     &
                       mat=a, ortho=ortho, max_qr_steps=max_qr_steps )
!
!   ON EXIT OF trid_deflate :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE DEFLATION
!                         ALGORITHM.
!
!       eigvec CONTAINS THE neig EIGENVECTORS OF a ASSOCIATED WITH THE EIGENVALUES eigval(:neig).
!
!   trid_deflate MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!   IDENTICAL AND IF THESE EIGENVALUES ARE NOT COMPUTED WITH HIGH ACCURACY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        a(:n,:neig) = matmul( a2, eigvec ) - eigvec*spread( eigval(:neig), 1, n )
        resid2(:neig) = norm( a(:n,:neig), dim=2_i4b )
        err1 =  maxval( resid2(:neig) )/( norm( a2 )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a(:neig,:neig) )
!
        a2(:neig,:neig) = abs( a(:neig,:neig) - matmul( transpose( eigvec ), eigvec ) )
        err2 = maxval(a2(:neig,:neig))/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, eigvec, eigval, d, e, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, eigvec, eigval, d, e )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_bisect() ) = ', failure1
    write (prtunit,*) ' FAILURE ( from trid_deflate()   ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', neig, ' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_symtrid_bisect
! =================================
!
end program ex2_symtrid_bisect

ex2_symtrid_cmp.F90

program ex2_symtrid_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines SYMTRID_CMP and
!   ORTHO_GEN_SYMTRID in module EIG_Procedures for computing a tridiagonal reduction
!   of a real symmetric matrix stored in packed form.
!
!
! LATEST REVISION : 05/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, symtrid_cmp,    &
                         ortho_gen_symtrid, triangle, norm, unit_matrix,         &
                         merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIC MATRIX.
!
    integer(i4b), parameter :: prtunit=6, n=3000, p=n*(n+1)/2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of symtrid_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, a_packed
    real(stnd), allocatable, dimension(:,:) :: a, q, resid, trid
!
    integer(i4b) :: l
!
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : TRIDIAGONAL REDUCTION OF A REAL SYMMETRIC
!   MATRIX STORED IN PACKED FORM.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a_packed(p), q(n,n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM SELF-ADJOINT MATRIX a IN PACKED FORM.
!
    call random_number( a_packed(:p) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       UNPACK AND SAVE RANDOM SELF-ADJOINT MATRIX a .
!
        a(:n,:n) = unpack( a_packed(:p), mask=triangle( true, n, n, extra=1_i4b), field=zero )
!
        do l = 1_i4b, n-1_i4b
             a(l+1_i4b:n,l) = a(l,l+1_i4b:n)
        end do
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   CALL symtrid_cmp AND ortho_gen_symtrid_cmp TO REDUCE THE MATRIX a
!   IN PACKED FORM TO TRIDIAGONAL FORM
!
!                      a = Q*TRID*Q**(t)
!
!   WHERE Q IS ORTHOGONAL AND TRID IS A SYMMETRIC TRIDIAGONAL MATRIX.
!
!   ON ENTRY OF symtrid_cmp, a_packed MUST CONTAINS THE LEADING n-BY-n UPPER TRIANGULAR PART
!   OF THE MATRIX TO BE REDUCED IN PACKED FORMAT.
!
    call symtrid_cmp( a_packed(:p), d, e, store_q=true )
!
!   ON EXIT OF symtrid_cmp:
!
!         ARGUMENTS d AND e CONTAIN, RESPECTIVELY, THE DIAGONAL AND
!         OFF-DIAGONAL ELEMENTS OF THE TRIDIAGONAL MATRIX TRID.
!
!         IF THE OPTIONAL ARGUMENT store_q IS PRESENT AND SET TO TRUE,
!         THE LINEAR ARRAY a_packed IS OVERWRITTEN BY THE MATRIX Q
!         AS A PRODUCT OF ELEMENTARY REFLECTORS.
!
!   UNPACKED THE MATRIX IN ORDER TO GENERATE Q BY A CALL TO ortho_gen_symtrid.
!
    q(:n,:n) = unpack( a_packed(:p), mask=triangle( true, n, n, extra=1_i4b), field=zero )
!
!   ortho_gen_symtrid GENERATES THE MATRIX Q STORED AS A PRODUCT OF
!   ELEMENTARY REFLECTORS AFTER A CALL TO symtrid_cmp WITH store_q=true.
!
    call ortho_gen_symtrid( q )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( trid(n,n), resid(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*Q - Q*TRID .
!
        trid(:n,:n) = zero
!
        do l = 1_i4b, n-1_i4b
            trid(l,l)       = d(l)
            trid(l,l+1_i4b) = e(l)
            trid(l+1_i4b,l) = e(l)
        end do
!
        trid(n,n) = d(n)
!
        resid(:n,:n) = matmul( a(:n,:n), q(:n,:n)  )           &
                       - matmul( q(:n,:n), trid(:n,:n) )
!
        trid(:n,1_i4b) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( trid(:n,1_i4b) )/( norm( a )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - Q**(t)*Q.
!
        call unit_matrix( a(:n,:n) )
!
        resid(:n,:n) = abs( a(:n,:n) - matmul( transpose(q(:n,:n )), q(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a_packed, q, d, e, a, trid, resid )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a_packed, q, d, e )
!
    endif
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed tridiagonal decomposition a = Q*TRD*Q**(t) = ', err1
        write (prtunit,*) 'Orthogonality of the computed Q orthogonal matrix                   = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the tridiagonal reduction of a ', &
       n, ' by ', n,' real symmetric matrix in packed form is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_symtrid_cmp
! ==============================
!
end program ex2_symtrid_cmp

ex2_symtrid_cmp2.F90

program ex2_symtrid_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines SYMTRID_CMP2,
!   ORTHO_GEN_SYMTRID and SYMTRID_QRI2 in module EIG_Procedures for computing a
!   tridiagonal reduction of a real cross-product matrix using the Rhala one-sided
!   method and an eigen decomposition of this cross-product matrix.
!
!
! LATEST REVISION : 05/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, symtrid_cmp2, &
                         ortho_gen_symtrid, symtrid_qri2, norm, unit_matrix,   &
                         merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE MATRIX USED
! TO COMPUTE THE MATRIX CROSS-PRODUCT, m MUST BE GREATER THAN n, OTHERWISE
! symtrid_cmp2 WILL STOP WITH AN ERROR MESSAGE.
!
    integer(i4b), parameter :: prtunit=6, m=3000, n=1000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character, parameter :: sort='d'
!
    character(len=*), parameter :: name_proc='Example 2 of symtrid_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e
    real(stnd), allocatable, dimension(:,:) :: a, at, ata, resid
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, failure
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : EIGEN DECOMPOSITION OF A REAL SYMMETRIC MATRIX PRODUCT,
!               a**(t)*a, USING THE ONE-SIDED RALHA METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A m-by-n RANDOM DATA MATRIX a .
!
    call random_number( a )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( ata(n,n), at(n,m), resid(n,n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
        at(:n,:m) = transpose( a(:m,:n) )
!
!       COMPUTE THE SYMMETRIC MATRIX CROSS-PRODUCT.
!
        ata(:n,:n) = matmul( at(:n,:m), a(:m,:n) )
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   CALL symtrid_cmp2 AND ortho_gen_symtrid TO REDUCE THE MATRIX CROSS-PRODUCT TO TRIDIAGONAL FORM
!
!                      a**(t)*a = Q*TRID*Q**(t)
!
!   WHERE Q IS ORTHOGONAL AND TRID IS A SYMMETRIC TRIDIAGONAL MATRIX.
!
!   ON ENTRY OF symtrid_cmp2, a MUST CONTAINS THE INITIAL m-by-n MATRIX USED
!   FOR COMPUTING THE MATRIX CROSS-PRODUCT. THE ORTHOGONAL MATRIX Q IS STORED
!   IN FACTORED FORM IF THE LOGICAL ARGUMENT store_q IS SET TO true.
!
    call symtrid_cmp2( a(:m,:n), d, e, store_q=true )
!
!   ON EXIT OF symtrid_cmp2:
!
!         ARGUMENTS d AND e CONTAIN, RESPECTIVELY, THE DIAGONAL AND
!         OFF-DIAGONAL ELEMENTS OF THE TRIDIAGONAL MATRIX TRID.
!
!         IF THE LOGICAL ARGUMENT store_q IS SET TO TRUE ON ENTRY,
!         THE LEADING n-BY-n LOWER TRIANGULAR PART OF a IS OVERWRITTEN
!         BY THE MATRIX Q AS A PRODUCT OF ELEMENTARY REFLECTORS.
!
!   ortho_gen_symtrid GENERATES THE MATRIX Q STORED AS A PRODUCT OF
!   ELEMENTARY REFLECTORS AFTER A CALL TO symtrid_cmp2 WITH store_q=true.
!
    call ortho_gen_symtrid( a(:n,:n), false )
!
!   COMPUTE EIGENDECOMPOSITION OF a**(t)*a WITH SUBROUTINE symtrid_qri2:
!
!                            a**(t)*a = U*D*U**(t)
!
!   WHERE U ARE THE EIGENVECTORS AND D IS THE DIAGONAL MATRIX, WITH
!   EIGENVALUES ON THE DIAGONAL.
!
    call symtrid_qri2( d(:n), e(:n), failure, a(:n,:n), sort=sort )
!
!   ON EXIT OF symtrid_qri2:
!
!         ARGUMENTS d AND a(:n:n) CONTAIN, RESPECTIVELY, THE EIGENVALUES AND
!         EIGENVECTORS OF a**(t)*a.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        resid(:n,:n) = matmul( ata(:n,:n), a(:n,:n)  )    &
                       - a(:n,:n)*spread( d(:n), 1, n )
!
        e(:n) = norm( resid(:n,:n), dim=2_i4b )
        err1 =  maxval( e(:n) )/( norm( ata )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( ata(:n,:n) )
!
        at(:n,:n) = transpose( a(:n,:n) )
!
        resid(:n,:n) = abs( ata(:n,:n) - matmul( at(:n,:n), a(:n,:n) ) )
        err2 = maxval( resid(:n,:n) )/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( ata, resid, at )
!
    endif
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, d, e )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps  .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_qri2() ) = ', failure
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigendecomposition a**(t)*a = U*D*U**(t) = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors U**(t)*U - I           = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing the eigendecomposition of a ',       &
       n, ' by ', n,' real symmetric matrix product is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_symtrid_cmp2
! ===============================
!
end program ex2_symtrid_cmp2

ex2_symtrid_qri.F90

program ex2_symtrid_qri
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SYMTRID_QRI
!   in module Eig_Procedures for computing all the eigenvalues of a symmetric
!   tridiagonal matrix by the Pal-Walker-Kahan variant of the QR algorithm
!   with implicit shift.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module Eig_Procedures
!    for computing all or selected eigenvectors of a symmetric tridiagonal matrix by
!    inverse iterations.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack,  only : lgl, i4b, stnd, true, false, zero, one, two, c50,  &
                          allocate_error, merror, trid_inviter, symtrid_qri, &
                          norm, unit_matrix 
#ifdef _MATMUL
    use Statpack,  only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Utilities, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! n IS THE DIMENSION OF THE SYMMETRIC TRIDIAGONAL MATRIX;
! neig IS THE NUMBER OF EIGENVECTORS WHICH ARE COMPUTED
! BY INVERSE ITERATIONS;
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=4000, neig=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of symtrid_qri'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, e2, eigval
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid, eigvec
!
    integer(i4b) :: l
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE EIGENVALUES OF A SYMMETRIC TRIDIAGONAL MATRIX
!               USING THE IMPLICIT QR METHOD AND SELECTED EIGENVECTORS BY
!               INVERSE ITERATION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( eps )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( eigvec(n,neig), d(n), e(n), e2(n), eigval(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A (1-2-1) TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    d(:n) = two
    e(:n) = one
!
!    call random_number( d(:n) )
!    call random_number( e(:n) )
!
!   SAVE THE TRIDIAGONAL FORM FOR LATER USE.
!
    eigval(:n) = d(:n)
    e2(:n)     = e(:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE EIGENVALUES OF THE SYMMETRIC TRIDIAGONAL MATRIX.
!
    call symtrid_qri( eigval(:n), e2(:n), failure, sort=sort )
!
!   ON EXIT, THE COMPUTED EIGENVALUES ARE STORED IN eigval(:n)
!   AND THE SYMMETRIC TRIDIAGONAL MATRIX IS OVERWRITTEN.
!
!   ON EXIT OF symtrid_qri :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE IMPLICIT
!                         QR ALGORITHM.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
!   COMPUTE THE FIRST neig EIGENVECTORS OF THE TRIDIAGONAL MATRIX BY maxiter INVERSE ITERATIONS.
!
    call trid_inviter( d(:n), e(:n), eigval(:neig), eigvec(:n,:neig), failure2, maxiter=maxiter )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a(n,n), a2(neig,neig), resid(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       FORM THE TRIDIAGONAL MATRIX.
!
        do l = 1_i4b, n-1_i4b
!
            a(l,l)       = d(l)
            a(l+1_i4b,l) = e(l)
            a(l,l+1_i4b) = e(l)
!
        end do
!
        a(n,n) = d(n)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u - u*s
!       WHERE s ARE THE EIGENVALUES AND u THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        resid = matmul( a, eigvec ) - eigvec*spread( eigval(:neig), 1, n )
!
        err1 = norm(resid)/( norm(a)*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        call unit_matrix( a2 )
!
        resid(:neig,:neig) = a2 - matmul( transpose( eigvec ), eigvec )
!
        err2 = norm(resid(:neig,:neig))/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( eigvec, d, e, e2, eigval )
!
!   PRINT THE RESULTS OF THE TESTS.
!
    if ( err<=eps .and. .not.failure .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_qri()  ) = ', failure
    write (prtunit,*) ' FAILURE ( from trid_inviter() ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', neig, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_symtrid_qri
! ==============================
!
end program ex2_symtrid_qri

ex2_symtrid_qri2.F90

program ex2_symtrid_qri2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SYMTRID_QRI2
!   in module Eig_Procedures for computing all the eigenvalues of a symmetric
!   tridiagonal matrix by a Pal-Walker-Kahan variant of the QR algorithm
!   with implicit shift. This variant is slightly different from the one
!   used in SYMTRID_QRI subroutine.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine TRID_INVITER in module Eig_Procedures
!    for computing all or selected eigenvectors of a symmetric tridiagonal matrix by
!    inverse iterations.
!
!
! LATEST REVISION : 05/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack,  only : lgl, i4b, stnd, true, false, zero, one, two, c50,   &
                          allocate_error, merror, trid_inviter, symtrid_qri2, &
                          norm, unit_matrix 
#ifdef _MATMUL
    use Statpack,  only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Utilities, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT;
! n IS THE DIMENSION OF THE SYMMETRIC TRIDIAGONAL MATRIX;
! neig IS THE NUMBER OF EIGENVECTORS WHICH ARE COMPUTED
! BY INVERSE ITERATIONS;
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=4000, neig=100, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of symtrid_qri2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, e2, eigval
    real(stnd), allocatable, dimension(:,:) :: a, a2, resid, eigvec
!
    integer(i4b) :: l
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: failure, failure2, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 1 : COMPUTE THE EIGENVALUES OF A SYMMETRIC TRIDIAGONAL MATRIX
!               USING THE IMPLICIT QR METHOD AND SELECTED EIGENVECTORS BY
!               INVERSE ITERATION.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( eps )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( eigvec(n,neig), d(n), e(n), e2(n), eigval(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A (1-2-1) TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
    d(:n) = two
    e(:n) = one
!
!    call random_number( d(:n) )
!    call random_number( e(:n) )
!
!   SAVE THE TRIDIAGONAL FORM FOR LATER USE.
!
    eigval(:n) = d(:n)
    e2(:n)     = e(:n)
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE EIGENVALUES OF THE SYMMETRIC TRIDIAGONAL MATRIX.
!
    call symtrid_qri2( eigval(:n), e2(:n), failure, sort=sort )
!
!   ON EXIT, THE COMPUTED EIGENVALUES ARE STORED IN eigval(:n)
!   AND THE SYMMETRIC TRIDIAGONAL MATRIX IS OVERWRITTEN.
!
!   ON EXIT OF symtrid_qri2 :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE IMPLICIT
!                         QR ALGORITHM.
!
!   IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
!   COMPUTE THE FIRST neig EIGENVECTORS OF THE TRIDIAGONAL MATRIX BY maxiter INVERSE ITERATIONS.
!
    call trid_inviter( d(:n), e(:n), eigval(:neig), eigvec(:n,:neig), failure2, maxiter=maxiter )
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a(n,n), a2(neig,neig), resid(n,neig), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       FORM THE TRIDIAGONAL MATRIX.
!
        do l = 1_i4b, n-1_i4b
!
            a(l,l)       = d(l)
            a(l+1_i4b,l) = e(l)
            a(l,l+1_i4b) = e(l)
!
        end do
!
        a(n,n) = d(n)
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u - u*s
!       WHERE s ARE THE EIGENVALUES AND u THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        resid = matmul( a, eigvec ) - eigvec*spread( eigval(:neig), 1, n )
!
        err1 = norm(resid)/( norm(a)*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        call unit_matrix( a2 )
!
        resid(:neig,:neig) = a2 - matmul( transpose( eigvec ), eigvec )
!
        err2 = norm(resid(:neig,:neig))/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a, a2, resid )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( eigvec, d, e, e2, eigval )
!
!   PRINT THE RESULTS OF THE TESTS.
!
    if ( err<=eps .and. .not.failure .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_qri2()  ) = ', failure
    write (prtunit,*) ' FAILURE ( from trid_inviter()  ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', neig, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_symtrid_qri2
! ===============================
!
end program ex2_symtrid_qri2

ex2_trid_deflate.F90

program ex2_trid_deflate
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine TRID_DEFLATE
!   in module Eig_Procedures for computing all or selected eigenvectors of a real
!   symmetric matrix stored in packed form by a deflation technique.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutines SYMTRID_CMP and SYMTRID_BISECT
!    in module EIG_Procedures for reducing a real symmetric matrix stored in packed form to
!    tridiagonal form and computing selected eigenvalues of a real symmetric (tridiagonal) matrix
!    by a bisection method.
!
!
! LATEST REVISION : 05/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, safmin, triangle, trid_deflate,   &
                         symtrid_cmp, symtrid_bisect, norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE SYMMETRIX MATRIX,
! neig IS THE NUMBER OF THE WANTED EIGENVALUES/EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, p=(n*(n+1))/2, nvec=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of trid_deflate'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, abstol, elapsed_time
    real(stnd), allocatable, dimension(:,:) :: a, resid, eigvec
    real(stnd), allocatable, dimension(:)   :: a_packed, eigval, resid2, d, e
!
    integer       :: iok, istart, iend, irate, imax, itime
    integer(i4b)  :: max_qr_steps, i, neig
!
    logical(lgl)  :: failure, failure2, do_test, ortho
!   
    character     :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : EIGENVALUES AND, OPTIONALLY, SELECTED EIGENVECTORS
!               OF A REAL SYMMETRIC MATRIX IN PACKED FORM USING A
!               DEFLATION METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE PARAMETERS OF THE BISECTION ALGORITHM.
!
!   THE EIGENVALUES ARE COMPUTED WITH MAXIMUM ACCURACY WHEN OPTIONAL PARAMETER abstol IS SET TO
!   sqrt( safmin ), WHICH IS THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD.
!
    abstol = sqrt( safmin )
!
!   DETERMINE PARAMETERS OF THE DEFLATION ALGORITHM.
!
!   OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED EIGENVECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF EIGENVALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE EIGENVECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = true
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A TRIDIAGONAL MATRIX FOR A GIVEN EIGENVALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL REQUESTED
!   EIGENVALUES EXCEEDS max_qr_steps * neig.
!
    max_qr_steps = 3_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a_packed(p), eigvec(n,nvec), eigval(n), d(n), e(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM SELF-ADJOINT MATRIX a IN PACKED FORM.
!
    call random_number( a_packed(:p) )
!
!   MAKE THE MATRIX POSITIVE DEFINITE ASSUMING THAT THE UPPER TRIANGLE OF
!   THE SELF-ADJOINT MATRIX IS STORED IN PACKED FORM.
!
    do i = 1_i4b, n
        a_packed(i+((i-1_i4b)*i/2_i4b)) = real( n, stnd )
    end do
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a(n,n), resid(n,nvec), resid2(nvec), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       UNPACKED AND SAVE RANDOM SELF-ADJOINT MATRIX a 
!       ASSUMING THAT THE UPPER TRIANGLE OF THE SELF-ADJOINT
!       MATRIX IS STORED IN PACKED FORM.
!
        a(:n,:n) = unpack( a_packed(:p), mask=triangle( true, n, n, extra=1_i4b), field=zero )
!
        do i = 1_i4b, n-1_i4b
             a(i+1_i4b:n,i) = a(i,i+1_i4b:n)
        end do
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
!   FIRST REDUCE THE SYMMETRIC MATRIX (STORED IN PACKED FORM) TO SYMMETRIC TRIDIAGONAL
!   FORM BY ORTHOGONAL TRANSFORMATIONS WITH SUBROUTINE symtrid_cmp. THE ORTHOGONAL
!   TRANSFORMATIONS ARE SAVED IF THE OPTIONAL PARAMETER store_q IS SET TO TRUE.
!
    call symtrid_cmp( a_packed(:p), d(:n), e(:n), store_q=true )
!
!   ON EXIT OF symtrid_cmp:
!
!       a_packed IS OVERWRITTEN WITH THE HOUSEHOLDER TRANSFORMATIONS USED TO REDUCE a
!       TO TRIDIAGONAL FORM IF THE OPTIONAL PARAMETER store_q IS SET TO TRUE,
!       OTHERWISE a IS DESTROYED.
!
!       ARGUMENTS d and e CONTAIN, RESPECTIVELY THE DIAGONAL AND OFF-DIAGONAL
!       ELEMENTS OF THE TRIDIAGONAL MATRIX.
!
!   SECOND, COMPUTE ALL EIGENVALUES OF THE SELF-ADJOINT MATRIX a TO HIGH
!   ACCURACY WITH SUBROUTINE symtrid_bisect.
!
    call symtrid_bisect( d(:n), e(:n), neig, eigval(:n), failure, &
                         sort=sort, abstol=abstol )
!
!   ON EXIT OF symtrid_bisect:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a .
!
!       eigval IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!       IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!       IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
!   NEXT COMPUTE THE FIRST nvec EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY A
!   DEFLATION TECHNIQUE APPLIED ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!   d_e AND BACK-TRANSFORMATION.
!
!   ON ENTRY OF SUBROUTINE trid_deflate, PARAMETER eigval CONTAINS SELECTED
!   EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!   THE EIGENVALUES VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call trid_deflate( d(:n), e(:n), eigval(:nvec), eigvec(:n,:nvec), failure=failure2,     &
                       matp=a_packed, ortho=ortho, max_qr_steps=max_qr_steps )
!
!   ON EXIT OF trid_deflate :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM
!                         ALGORITHM.
!
!       eigvec CONTAINS THE nvec EIGENVECTORS OF a ASSOCIATED WITH THE EIGENVALUES eigval(:nvec).
!
!   trid_deflate MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!   IDENTICAL AND IF THESE EIGENVALUES ARE NOT COMPUTED WITH HIGH ACCURACY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        resid(:n,:nvec) = matmul( a, eigvec ) - eigvec*spread( eigval(:nvec), 1, n )
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 =  maxval( resid2(:nvec) )/( norm( a )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a(:nvec,:nvec) )
!
        resid(:nvec,:nvec) = abs( a(:nvec,:nvec) - matmul( transpose( eigvec ), eigvec ) )
!
        err2 = maxval(resid(:nvec,:nvec))/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a_packed, eigvec, eigval, d, e, a, resid, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a_packed, eigvec, eigval, d, e )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_bisect() ) = ', failure
    write (prtunit,*) ' FAILURE ( from trid_deflate()   ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', nvec, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix in packed form is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_trid_deflate
! ===============================
!
end program ex2_trid_deflate

ex2_trid_inviter.F90

program ex2_trid_inviter
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine TRID_INVITER
!   in module Eig_Procedures for computing all or selected eigenvectors of a real
!   symmetric matrix stored in packed form by inverse iterations.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine EIGVAL_CMP in module EIG_Procedures
!    for reducing a real symmetric matrix stored in packed form to tridiagonal form and computing
!    selected eigenvalues of a real symmetric (tridiagonal) matrix by the fast Pal-Walker-Kahan
!    variant of the QR method with implicit shift.
!
!
! LATEST REVISION : 05/12/2023
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, c50, triangle, trid_inviter,   &
                         eigval_cmp, norm, unit_matrix, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED MATRIX,
! nvec IS THE NUMBER OF THE WANTED EIGENVECTORS COMPUTED BY INVERSE ITERATIONS,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE THE TOP neig EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, p=(n*(n+1))/2, nvec=3000, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 2 of trid_inviter'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, elapsed_time
    real(stnd), allocatable, dimension(:,:) :: a, resid, eigvec, d_e
    real(stnd), allocatable, dimension(:)   :: a_packed, eigval, resid2
!
    integer       :: iok, istart, iend, irate, imax, itime
    integer(i4b)  :: i
!
    logical(lgl)  :: failure1, failure2, do_test
!   
    character     :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 2 : EIGENVALUES AND, OPTIONALLY, SELECTED EIGENVECTORS OF
!               A REAL SYMMETRIC MATRIX STORED IN PACKED FORM USING
!               THE INVERSE ITERATION METHOD.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a_packed(p), eigvec(n,nvec), eigval(n), d_e(n,2), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM SELF-ADJOINT MATRIX a IN PACKED FORM.
!
    call random_number( a_packed(:p) )
!
!   MAKE a POSITIVE DEFINITE ASSUMING THAT THE UPPER TRIANGLE OF
!   THE SELF-ADJOINT MATRIX IS STORED IN PACKED FORM.
!
    do i = 1_i4b, n
        a_packed(i+((i-1_i4b)*i/2_i4b)) = real( n, stnd )
    end do
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a(n,n), resid(n,nvec), resid2(nvec), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       UNPACKED AND SAVE RANDOM SELF-ADJOINT MATRIX a 
!       ASSUMING THAT THE UPPER TRIANGLE OF THE SELF-ADJOINT
!       MATRIX IS STORED IN PACKED FORM.
!
        a(:n,:n) = unpack( a_packed(:p), mask=triangle( true, n, n, extra=1_i4b), field=zero )
!
        do i = 1_i4b, n-1_i4b
             a(i+1_i4b:n,i) = a(i,i+1_i4b:n)
        end do
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   THE EIGENVALUE DECOMPOSITION OF A REAL n-BY-n SYMMETRIC MATRIX a
!   IS WRITTEN
!
!                       a = U * D * U**(t)
!
!   WHERE D IS AN n-BY-n DIAGONAL MATRIX AND U IS AN n-BY-n ORTHOGONAL MATRIX.
!   THE DIAGONAL ELEMENTS OF D ARE THE EIGENVALUES OF a; THEY ARE REAL.
!   THE COLUMNS OF U ARE THE EIGENVECTORS OF a.
!
!   FIRST, COMPUTE ALL EIGENVALUES OF THE SELF-ADJOINT MATRIX a IN PACKED FORM
!   AND SAVE THE INTERMEDIATE TRIDIAGONAL MATRIX IN PARAMETER d_e WITH
!   SUBROUTINE eigval_cmp.
!
    call eigval_cmp( a_packed, eigval, failure=failure1, sort=sort, d_e=d_e )
!
!   ON EXIT OF eigval_cmp:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION 
!                         OF AN INTERMEDIATE TRIDIAGONAL FORM TRID OF a .
!
!       a_packed IS OVERWRITTEN WITH THE HOUSEHOLDER TRANSFORMATIONS USED TO REDUCE a
!       TO TRIDIAGONAL FORM IF THE OPTIONAL PARAMETER d_e IS PRESENT, OTHERWISE
!       a_packed IS DESTROYED.
!
!       eigval IS OVERWRITTEN WITH THE EIGENVALUES OF a.
!       IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!       IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
!       d_e IS AN OPTIONAL ARGUMENT TO SAVE THE INTERMEDIATE TRIDIAGONAL FORM OF a.
!
!   NEXT COMPUTE THE FIRST nvec EIGENVECTORS OF THE SELF-ADJOINT MATRIX a BY
!   maxiter INVERSE ITERATIONS ON THE INTERMEDIATE TRIDIAGONAL MATRIX
!   d_e AND BACK-TRANSFORMATION.
!
    call trid_inviter( d_e(:n,1), d_e(:n,2), eigval(:nvec), eigvec(:n,:nvec), failure=failure2,   &
                       matp=a_packed, maxiter=maxiter )
!
!   ON EXIT OF trid_inviter :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM

!       eigvec CONTAINS THE nvec EIGENVECTORS OF a ASSOCIATED WITH THE EIGENVALUES eigval(:nvec).
!
!   trid_inviter MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!   IDENTICAL.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*U - U*D
!
        resid(:n,:nvec) = matmul( a, eigvec ) - eigvec*spread( eigval(:nvec), 1, n )
        resid2(:nvec) = norm( resid(:n,:nvec), dim=2_i4b )
!
        err1 =  maxval( resid2(:nvec) )/( norm( a )*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - U**(t)*U.
!
        call unit_matrix( a(:nvec,:nvec) )
!
        resid(:nvec,:nvec) = abs( a(:nvec,:nvec) - matmul( transpose( eigvec ), eigvec ) )
!
        err2 = maxval(resid(:nvec,:nvec))/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a_packed, eigvec, eigval, d_e, a, resid, resid2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a_packed, eigvec, eigval, d_e )
!
    end if
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( err<=eps .and. .not.failure1 .and. .not.failure2 ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from eigval_cmp()   ) = ', failure1
    write (prtunit,*) ' FAILURE ( from trid_inviter() ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing all eigenvalues and ', nvec, ' eigenvectors of a ', &
       n, ' by ', n,' real symmetric matrix in packed form is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex2_trid_inviter
! ===============================
!
end program ex2_trid_inviter

ex3_llsq_qr_solve.F90

program ex3_llsq_qr_solve
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LLSQ_QR_SOLVE
!   in module LLSQ_Procedures for solving linear least squares problems.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c100, allocate_error, &
                         merror, norm, llsq_qr_solve
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ELEMENTS OF THE RANDOM VECTOR.
! nrhs IS THE NUMBER OF COLUMNS OF THE RIGHT HAND SIDE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=1000, nrhs=10
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c100
!
    character(len=*), parameter :: name_proc='Example 3 of llsq_qr_solve'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, err, err1, err2, elapsed_time
    real(stnd), allocatable, dimension(:)   :: a, x, bnorm, rnorm
    real(stnd), allocatable, dimension(:,:) :: b, resid
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 3 : SOLVING LINEAR LEAST SQUARES PROBLEMS WITH A m-ELEMENTS REAL COEFFICIENT
!               VECTOR AND MULTIPLE RIGHT HAND SIDES.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m)*x(:nrhs) ≈ b(:m,:nrhs) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m), b(m,nrhs), resid(m,nrhs), x(nrhs), bnorm(nrhs), rnorm(nrhs), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT VECTOR a(:m) .
!
    call random_number( a(:m) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE MATRIX b(:m,:nrhs) .
!
    call random_number( b(:m,:nrhs) )
!
!   COMPUTE THE NORMS OF THE nrhs DEPENDENT VARIABLES b .
!
    bnorm(:nrhs) = norm( b(:m,:nrhs), dim=2_i4b )
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   SOLVE THE LINEAR LEAST SQUARES PROBLEM USING SUBROUTINE llsq_qr_solve.
!
    call llsq_qr_solve( a(:m), b(:m,:nrhs), x(:nrhs), rnorm=rnorm(:nrhs), &
                        resid=resid(:m,:nrhs) )
!
!   llsq_qr_solve COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OF a. a IS A m-ELEMENTS
!   VECTOR OR AN m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. IF a IS A MATRIX, m>=n OR
!   n>m IS PERMITTED. a AND b ARE NOT OVERWRITTEN BY llsq_qr_solve.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b CAN BE HANDLED IN A SINGLE CALL;
!   THEY ARE STORED AS THE COLUMNS OF THE m-BY-nrhs RIGHT HAND SIDE MATRIX b.
!   IN THAT CASE, x MUST BE A nrhs-ELEMENTS VECTOR.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR (OR SCALAR) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF llsq_qr_solve .
!   
!   IF THE OPTIONAL REAL ARRAY ARGUMENT resid IS PRESENT IN THE CALL OF llsq_qr_solve,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND ON EXIT
!
!                               resid = b - a*x .
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE THE COEFFICIENT VECTOR a .
!
        err1 = maxval( abs( matmul( a, resid ) ) )/ sum( abs(a) )
!
!       CHECK THE NORMS OF THE COLUMNS OF RESIDUAL MATRIX.
!
        err2 = maxval( abs( norm( resid(:m,:nrhs), dim=2_i4b ) - rnorm(:nrhs) ) )
!
        err = max( err1, err2 )
!
    end if
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*)
    write (prtunit,*) 'Least squares solution of min of ||a(:)*x(:)-b(:,:)||**2 for vector x(:)'
    write (prtunit,*)
    write (prtunit,*) 'Residual sum of squares     ||a*x(i)-b(:,i)||**2               :',rnorm(:nrhs)**2
    write (prtunit,*) 'Residual sum of squares (%) ||a*x(i)-b(:,i)||**2/||b(:,i)||**2 :',(rnorm(:nrhs)/bnorm(:nrhs))**2
    write (prtunit,*)
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
      m, ' real coefficient vector and a ', m, ' by ', nrhs,       &
      ' right hand side matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, resid, x, bnorm, rnorm )
!
!
! END OF PROGRAM ex3_llsq_qr_solve
! ================================
!
end program ex3_llsq_qr_solve

ex3_llsq_qr_solve2.F90

program ex3_llsq_qr_solve2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine LLSQ_QR_SOLVE2
!   in module LLSQ_Procedures for solving linear least squares problems.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c100, allocate_error, &
                         merror, norm, llsq_qr_solve2
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ELEMENTS OF THE RANDOM VECTOR.
! nrhs IS THE NUMBER OF COLUMNS OF THE RIGHT HAND SIDE MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=1000, nrhs=10
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c100
!
    character(len=*), parameter :: name_proc='Example 3 of llsq_qr_solve2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, err, err1, err2, elapsed_time
    real(stnd), allocatable, dimension(:)   :: a, a2, x, bnorm, rnorm
    real(stnd), allocatable, dimension(:,:) :: b
!
    integer :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: do_test, comp_resid
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 3 : SOLVING LINEAR LEAST SQUARES PROBLEMS WITH A m-ELEMENTS REAL COEFFICIENT
!               VECTOR AND MULTIPLE RIGHT HAND SIDES.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m)*x(:nrhs) ≈ b(:m,:nrhs) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   SPECIFY IF RESIDUALS MUST BE COMPUTED.
!
    comp_resid = do_test
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m), b(m,nrhs), x(nrhs), bnorm(nrhs), rnorm(nrhs), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT VECTOR a(:m) .
!
    call random_number( a(:m) )
!
!   GENERATE A RANDOM RIGHT HAND-SIDE MATRIX b(:m,:nrhs) .
!
    call random_number( b(:m,:nrhs) )
!
!   COMPUTE THE NORMS OF THE nrhs DEPENDENT VARIABLES b .
!
    bnorm(:nrhs) = norm( b(:m,:nrhs), dim=2_i4b )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( a2(m), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE DATA VECTOR FOR LATER USE.
!
        a2(:m) = a(:m)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   SOLVE THE LINEAR LEAST SQUARES PROBLEM USING SUBROUTINE llsq_qr_solve2.
!
    call llsq_qr_solve2( a(:m), b(:m,:nrhs), x(:nrhs), rnorm=rnorm(:nrhs), &
                        comp_resid=comp_resid )
!
!   llsq_qr_solve2 COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OF a. a IS A m-ELEMENTS
!   VECTOR OR AN m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. IF a IS A MATRIX, m>=n OR
!   n>m IS PERMITTED. a AND b ARE OVERWRITTEN BY llsq_qr_solve2.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b CAN BE HANDLED IN A SINGLE CALL;
!   THEY ARE STORED AS THE COLUMNS OF THE m-BY-nrhs RIGHT HAND SIDE MATRIX b.
!   IN THAT CASE, x MUST BE A nrhs-ELEMENTS VECTOR.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR (Or SCALAR) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF llsq_qr_solve2 .
!   
!   IF THE OPTIONAL LOGICAL ARGUMENT comp_resid IS PRESENT WITH THE VALUE true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND OVERWRITES b ON EXIT.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE THE COEFFICIENT VECTOR a .
!
        err1 = maxval( abs( matmul( a2, b ) ) )/ sum( abs(a2) )
!
!       CHECK THE NORMS OF THE COLUMNS OF RESIDUAL MATRIX.
!
        err2 = maxval( abs( norm( b(:m,:nrhs), dim=2_i4b ) - rnorm(:nrhs) ) )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAY.
!
        deallocate( a2 )
!
    end if
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( err<=eps ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*)
    write (prtunit,*) 'Least squares solution of min of ||a(:)*x(:)-b(:,:)||**2 for vector x(:)'
    write (prtunit,*)
    write (prtunit,*) 'Residual sum of squares     ||a*x(i)-b(:,i)||**2               :',rnorm(:nrhs)**2
    write (prtunit,*) 'Residual sum of squares (%) ||a*x(i)-b(:,i)||**2/||b(:,i)||**2 :',(rnorm(:nrhs)/bnorm(:nrhs))**2
    write (prtunit,*)
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
      m, ' real coefficient vector and a ', m, ' by ', nrhs,       &
      ' right hand side matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, bnorm, rnorm )
!
!
! END OF PROGRAM ex3_llsq_qr_solve2
! =================================
!
end program ex3_llsq_qr_solve2

ex3_partial_qr_cmp.F90

program ex3_partial_qr_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutines PARTIAL_QR_CMP in module
!   QR_Procedures and QR_SOLVE2 in module LLSQ_Procedures for computing a QR decomposition
!   with column pivoting or a complete orthogonal decomposition of a data matrix 
!   and solving an associated linear least squares problem.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c50, allocate_error, merror, &
                         partial_qr_cmp, qr_solve2, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=5000, n=3000, mn=min( m, n )
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 3 of partial_qr_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, anorm, rnorm, err1, err2, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, x, res, diagr, beta, tau
    real(stnd), allocatable, dimension(:,:) :: a, a2
!
    integer(i4b)                            :: krank, l, j, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, do_test, test_lin
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 3 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A QR DECOMPOSITION WITH COLUMN PIVOTING OR COMPLETE
!               ORTHOGONAL DECOMPOSITION OF THE COEFFICIENT MATRIX AND ONE RIGHT HAND SIDE.
!
!               COMPUTE SOLUTION VECTOR x FOR LINEAR LEAST SQUARES SYSTEM:
!
!                              a(:m,:n)*x(:n) ≈ b(:m) .
!
!
!   SET THE TOLERANCE AND REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = sqrt( fudge*ulp )
!
    err      = zero
    test_lin = true
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE COMPUTED QR APPROXIMATION IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF RESIDUAL VECTOR MUST BE COMPUTED.
!
    comp_resid = do_test
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), diagr(mn), beta(mn), tau(mn), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       SAVE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A QR DECOMPOSITION WITH COLUMN PIVOTING OR COMPLETE ORTHOGONAL
!   DECOMPOSITION OF RANDOM DATA MATRIX WITH SUBROUTINE partial_qr_cmp.
!
    call partial_qr_cmp( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol, tau=tau )
!
!   partial_qr_cmp COMPUTES A (PARTIAL OR COMPLETE) ORTHOGONAL FACTORIZATION
!   OF A REAL m-BY-n MATRIX. THE INPUT MATRIX MAY BE RANK-DEFICIENT.
!
!   HERE THE ROUTINE FIRST COMPUTES A QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF partial_qr_cmp.
!
!   IN OTHER WORDS, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR FACTORIZATION:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G. THE RANK OF R11),
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R (and a), WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER (IN THE 1-NORM) IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R (and a) IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE RANK OF R AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R11 ARE NOT
!       PERFORMED AND THE RANK OF R IS ASSUMED TO BE EQUAL TO mn = min(m,n).
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!                      
!   IF THE OPTIONAL PARAMETER tau IS PRESENT, partial_qr_cmp COMPUTES A (PARTIAL OR COMPLETE)
!   ORTHOGONAL FACTORIZATION OF a FROM THE QR FACTORIZATION WITH COLUMN PIVOTING OF a .
!
!   THIS COMPLETE ORTHOGONAL FACTORIZATION CAN THEN BE USED TO COMPUTE THE MINIMAL 2-NORM
!   SOLUTIONS FOR RANK DEFICIENT LINEAR LEAST SQUARES PROBLEMS WITH SUBROUTINE qr_solve2.
!
!   NEXT, COMPUTE THE SOLUTION VECTOR FOR LINEAR LEAST SQUARE SYSTEM
!
!                   a(:m,:n)*x(:n) ≈ b(:m) .
!
!   WITH SUBROUTINE qr_solve2.
!
    call qr_solve2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, b(:m), x(:n),  &
                    tau=tau, rnorm=rnorm, comp_resid=comp_resid )
!
!   qr_solve2 COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OR A COMPLETE
!   ORTHOGONAL FACTORIZATION OF a AS COMPUTED BY qr_cmp2, partial_qr_cmp,
!   partial_rqr_cmp AND partial_rqr_cmp2.
!
!   a IS A m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. FURTHERMORE m>=n
!   OR n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES. b IS OVERWRITTEN BY qr_solve2.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND OVERWRITES b.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF qr_solve2.
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL ARRAY PARAMETER tau IS
!   PRESENT IN BOTH THE CALLS OF partial_qr_cmp AND qr_solve2. OTHERWISE, SOLUTION(S) ARE
!   COMPUTED SUCH THAT IF THE jTH COLUMN OF a IS OMITTED FROM THE BASIS, x[j] O
!   x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    end if
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE COEFFICIENT MATRIX a .
!
        res(:n) = matmul( b(:m), a2(:m,:n) )
!
        err1 = maxval( abs(res(:n)) )/anorm
!
!       CHECK THE NORM OF THE RESIDUAL VECTOR.
!
        err2 = abs( norm( b(:m) ) - rnorm )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, res )
!
    end if
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    if ( krank/=mn ) then
        write (prtunit,*) 'Indices of linearly dependent columns    = ', ip(krank+1:n)
    end if
    write (prtunit,*) '2-norm of residual vector ||a*x-b||_2    = ', rnorm
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, diagr, beta, tau, ip )
!
!
! END OF PROGRAM ex3_partial_qr_cmp
! =================================
!
end program ex3_partial_qr_cmp

ex3_partial_rqr_cmp.F90

program ex3_partial_rqr_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine PARTIAL_RQR_CMP in module Random
!   and QR_SOLVE2 in module LLSQ_Procedures for computing a randomized QR decomposition
!   with column pivoting or a randomized complete orthogonal decomposition of a data matrix 
!   and solving an associated linear least squares problem.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c50, allocate_error, merror, &
                         partial_rqr_cmp, qr_solve2, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=5000, n=3000, mn=min( m, n )
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 3 of partial_rqr_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, anorm, rnorm, err1, err2, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, x, res, diagr, beta, tau
    real(stnd), allocatable, dimension(:,:) :: a, a2
!
    integer(i4b)                            :: krank, l, j, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, do_test, test_lin
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 3 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A RANDOMIZED QR DECOMPOSITION WITH COLUMN PIVOTING OR A
!               COMPLETE ORTHOGONAL DECOMPOSITION OF THE COEFFICIENT MATRIX AND ONE
!               RIGHT HAND SIDE.
!
!               COMPUTE SOLUTION VECTOR x FOR LINEAR LEAST SQUARES SYSTEM:
!
!                              a(:m,:n)*x(:n) ≈ b(:m) .
!
!   SET THE TOLERANCE AND REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = sqrt( fudge*ulp )
!
    err = zero
    test_lin = true
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE COMPUTED QR APPROXIMATION IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF RESIDUAL VECTOR MUST BE COMPUTED.
!
    comp_resid = do_test
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), diagr(mn), beta(mn), tau(mn), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       SAVE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A RANDOMIZED QR DECOMPOSITION WITH COLUMN PIVOTING OR COMPLETE ORTHOGONAL
!   DECOMPOSITION OF RANDOM DATA MATRIX WITH SUBROUTINE partial_rqr_cmp.
!
    call partial_rqr_cmp( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol, tau=tau )
!
!   partial_rqr_cmp COMPUTES A RANDOMIZED (PARTIAL OR COMPLETE) ORTHOGONAL FACTORIZATION
!   OF A REAL m-BY-n MATRIX. THE MATRIX MAY BE RANK-DEFICIENT.
!
!   HERE THE ROUTINE FIRST COMPUTES A RANDOMIZED QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF partial_rqr_cmp.
!
!   IN OTHER WORDS, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR FACTORIZATION:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G. THE RANK OF R11),
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. THEN, tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R (and a), WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER (IN THE 1-NORM) IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R (and a) IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE RANK OF R AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R11 ARE NOT
!       PERFORMED AND THE RANK OF R IS ASSUMED TO BE EQUAL TO mn = min(m,n).
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!                      
!   IF THE OPTIONAL PARAMETER tau IS PRESENT, partial_rqr_cmp COMPUTES A (PARTIAL OR COMPLETE)
!   ORTHOGONAL FACTORIZATION OF a FROM THE QR FACTORIZATION WITH COLUMN PIVOTING OF a .
!
!   THIS COMPLETE ORTHOGONAL FACTORIZATION CAN THEN BE USED TO COMPUTE THE MINIMAL 2-NORM
!   SOLUTIONS FOR RANK DEFICIENT LINEAR LEAST SQUARES PROBLEMS WITH SUBROUTINE qr_solve2.
!
!   NEXT, COMPUTE THE SOLUTION VECTOR FOR LINEAR LEAST SQUARE SYSTEM
!
!                   a(:m,:n)*x(:n) ≈ b(:m) .
!
!   WITH SUBROUTINE qr_solve2.
!
    call qr_solve2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, b(:m), x(:n),  &
                    tau=tau, rnorm=rnorm, comp_resid=comp_resid )
!
!   qr_solve2 COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OR A COMPLETE
!   ORTHOGONAL FACTORIZATION OF a AS COMPUTED BY qr_cmp2, partial_qr_cmp,
!   partial_rqr_cmp AND partial_rqr_cmp2.
!
!   a IS A m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. FURTHERMORE m>=n
!   OR n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES. b IS OVERWRITTEN BY qr_solve2.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND OVERWRITES b.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF qr_solve2.
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL ARRAY PARAMETER tau IS
!   PRESENT IN BOTH THE CALLS OF partial_rqr_cmp AND qr_solve2. OTHERWISE, SOLUTION(S) ARE
!   COMPUTED SUCH THAT IF THE jTH COLUMN OF a IS OMITTED FROM THE BASIS, x[j] O
!   x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    end if
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE COEFFICIENT MATRIX a .
!
        res(:n) = matmul( b(:m), a2(:m,:n) )
!
        err1 = maxval( abs(res(:n)) )/anorm
!
!       CHECK THE NORM OF THE RESIDUAL VECTOR.
!
        err2 = abs( norm( b(:m) ) - rnorm )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, res )
!
    end if
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    if ( krank/=mn ) then
        write (prtunit,*) 'Indices of linearly dependent columns    = ', ip(krank+1:n)
    end if
    write (prtunit,*) '2-norm of residual vector ||a*x-b||_2    = ', rnorm
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, diagr, beta, tau, ip )
!
!
! END OF PROGRAM ex3_partial_rqr_cmp
! ==================================
!
end program ex3_partial_rqr_cmp

ex3_partial_rqr_cmp2.F90

program ex3_partial_rqr_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine PARTIAL_RQR_CMP2 in module Random
!   and QR_SOLVE2 in module LLSQ_Procedures for computing a randomized QR decomposition
!   with column pivoting or a randomized complete orthogonal decomposition of a data matrix 
!   and solving an associated linear least squares problem.
!
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c50, allocate_error, merror, &
                         partial_rqr_cmp2, qr_solve2, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=5000, n=3000, mn=min( m, n )
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 3 of partial_rqr_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, anorm, rnorm, err1, err2, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, x, res, diagr, beta, tau
    real(stnd), allocatable, dimension(:,:) :: a, a2
!
    integer(i4b) :: krank, l, j, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer      :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, do_test, test_lin
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 3 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A RANDOMIZED QR DECOMPOSITION WITH COLUMN PIVOTING OR A
!               COMPLETE ORTHOGONAL DECOMPOSITION OF THE COEFFICIENT MATRIX AND ONE
!               RIGHT HAND SIDE.
!
!               COMPUTE SOLUTION VECTOR x FOR LINEAR LEAST SQUARES SYSTEM:
!
!                              a(:m,:n)*x(:n) ≈ b(:m) .
!
!   SET THE TOLERANCE AND REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( ulp )
    eps = sqrt( fudge*ulp )
!
    err = zero
    test_lin = true
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE COMPUTED QR APPROXIMATION IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF RESIDUAL VECTOR MUST BE COMPUTED.
!
    comp_resid = do_test
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), diagr(mn), beta(mn), tau(mn), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( a2(m,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       SAVE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A RANDOMIZED QR DECOMPOSITION WITH COLUMN PIVOTING OR COMPLETE ORTHOGONAL
!   DECOMPOSITION OF RANDOM DATA MATRIX WITH SUBROUTINE partial_rqr_cmp2.
!
    call partial_rqr_cmp2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol, tau=tau )
!
!   partial_rqr_cmp2 COMPUTES A RANDOMIZED (PARTIAL OR COMPLETE) ORTHOGONAL FACTORIZATION
!   OF A REAL m-BY-n MATRIX. THE MATRIX MAY BE RANK-DEFICIENT.
!
!   HERE THE ROUTINE FIRST COMPUTES A RANDOMIZED QR FACTORIZATION WITH COLUMN PIVOTING OF a AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF partial_rqr_cmp2.
!
!   IN OTHER WORDS, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR FACTORIZATION:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G. THE RANK OF R11),
!                      
!   IF tol IS PRESENT AND IS IN ]0,1[, THEN :
!       CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF THE SUBMATRIX R11
!       ARE PERFORMED. THEN, tol IS USED TO DETERMINE THE EFFECTIVE RANK
!       OF R (and a), WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER (IN THE 1-NORM) IS LESS THAN 1/tol.
!       ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER IS RETURNED IN tol.
!
!   IF tol IS PRESENT AND IS EQUAL TO 0, THEN :
!       THE NUMERICAL RANK OF R (and a) IS DETERMINED, E.G., CRUDE TESTS ON R(j,j) ARE
!       DONE TO DETERMINE THE RANK OF R AND tol IS NOT CHANGED ON EXIT.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF R11 ARE NOT
!       PERFORMED AND THE RANK OF R IS ASSUMED TO BE EQUAL TO mn = min(m,n).
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!                      
!   IF THE OPTIONAL PARAMETER tau IS PRESENT, partial_rqr_cmp2 COMPUTES A (PARTIAL OR COMPLETE)
!   ORTHOGONAL FACTORIZATION OF a FROM THE QR FACTORIZATION WITH COLUMN PIVOTING OF a .
!
!   THIS COMPLETE ORTHOGONAL FACTORIZATION CAN THEN BE USED TO COMPUTE THE MINIMAL 2-NORM
!   SOLUTIONS FOR RANK DEFICIENT LINEAR LEAST SQUARES PROBLEMS WITH SUBROUTINE qr_solve2.
!
!   NEXT, COMPUTE THE SOLUTION VECTOR FOR LINEAR LEAST SQUARE SYSTEM
!
!                   a(:m,:n)*x(:n) ≈ b(:m) .
!
!   WITH SUBROUTINE qr_solve2.
!
    call qr_solve2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, b(:m), x(:n),  &
                    tau=tau, rnorm=rnorm, comp_resid=comp_resid )
!
!   qr_solve2 COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING AN ORTHOGONAL FACTORIZATION WITH COLUMNS PIVOTING OR A COMPLETE
!   ORTHOGONAL FACTORIZATION OF a AS COMPUTED BY qr_cmp2, partial_qr_cmp,
!   partial_rqr_cmp AND partial_rqr_cmp2.
!
!   a IS A m-BY-n MATRIX WHICH MAY BE RANK-DEFICIENT. FURTHERMORE m>=n
!   OR n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES. b IS OVERWRITTEN BY qr_solve2.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND OVERWRITES b.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF qr_solve2.
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL ARRAY PARAMETER tau IS
!   PRESENT IN BOTH THE CALLS OF partial_rqr_cmp2 AND qr_solve2. OTHERWISE, SOLUTION(S) ARE
!   COMPUTED SUCH THAT IF THE jTH COLUMN OF a IS OMITTED FROM THE BASIS, x[j] O
!   x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    end if
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE COEFFICIENT MATRIX a .
!
        res(:n) = matmul( b(:m), a2(:m,:n) )
!
        err1 = maxval( abs(res(:n)) )/anorm
!
!       CHECK THE NORM OF THE RESIDUAL VECTOR.
!
        err2 = abs( norm( b(:m) ) - rnorm )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, res )
!
    end if
!
!   PRINT THE RESULT OF THE TESTS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    if ( krank/=mn ) then
        write (prtunit,*) 'Indices of linearly dependent columns    = ', ip(krank+1:n)
    end if
    write (prtunit,*) '2-norm of residual vector ||a*x-b||_2    = ', rnorm
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, diagr, beta, tau, ip )
!
!
! END OF PROGRAM ex3_partial_rqr_cmp2
! ===================================
!
end program ex3_partial_rqr_cmp2

ex3_qr_cmp2.F90

program ex3_qr_cmp2
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine QR_CMP2 in module QR_Procedures
!   and QR_SOLVE2 in module LLSQ_Procedures for solving full or deficient linear least squares
!   problems using a complete orthogonal decomposition of the coefficient matrix.
!
!
! LATEST REVISION : 15/11/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : i4b, lgl, stnd, zero, true, false, c100, allocate_error, merror, &
                         qr_cmp2, qr_solve2, norm
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT.
! m IS THE NUMBER OF ROWS OF THE RANDOM MATRIX.
! n IS THE NUMBER OF COLUMNS OF THE RANDOM MATRIX.
!
    integer(i4b), parameter :: prtunit=6, m=4000, n=3000, mn=min( m, n )
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c100
!
    character(len=*), parameter :: name_proc='Example 3 of qr_cmp2'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: ulp, eps, tol, anorm, rnorm, err1, err2, err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: b, x, res, diagr, beta, tau
    real(stnd), allocatable, dimension(:,:) :: a, a2
!
    integer(i4b)                            :: krank, j, l, idep
    integer(i4b), allocatable, dimension(:) :: ip
    integer                                 :: iok, istart, iend, irate, imax, itime
!
    logical(lgl) :: comp_resid, do_test, test_lin
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 3 : SOLVING A LINEAR LEAST SQUARES PROBLEM WITH A m-BY-n REAL COEFFICIENT
!               MATRIX USING A COMPLETE ORTHOGONAL DECOMPOSITION OF THE COEFFICIENT MATRIX.
!
!               COMPUTE SOLUTION VECTOR FOR LINEAR LEAST SQUARES SYSTEM
!
!                              a(:m,:n)*x(:n) ≈ b(:m) .
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    ulp = epsilon( err )
    eps = fudge*ulp
!
    err = zero
!
!   SET TOLERANCE FOR DETERMINING THE RANK OF THE MATRIX IN THE SUBROUTINE.
!
!    tol = 0.0000001_stnd
    tol = sqrt( ulp )
!
!   SPECIFY IF NUMERICAL TESTS ARE PERFORMED TO TEST THE ACCURACY OF THE SUBROUTINE.
!
    do_test = true
!
!   DECIDE IF COLUMN PIVOTING MUST BE PERFORMED AND
!   IF RESIDUAL VECTOR MUST BE COMPUTED.
!
    krank = 0
!
    comp_resid = do_test
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), b(m), x(n), diagr(mn), beta(mn), tau(mn), ip(n), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM COEFFICIENT MATRIX a(:m,:n) .
!
    call random_number( a(:m,:n) )
!
!   GENERATE A DEFICIENT COEFFICIENT MATRIX a(:m,:n) WITH RANK min(mn-1,n) .
!
    idep = min( n, 5_i4b ) 
    a(:m,idep) = a(:m,1_i4b) + a(:m,n)
!
!   GENERATE A RANDOM RIGHT HAND-SIDE VECTOR b(:m) .
!
    call random_number( b(:m) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS FOR LATER USE.
!
        allocate( a2(m,n), res(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       COMPUTE FROBENIUS NORM OF THE DATA MATRIX.
!
        anorm = norm( a(:m,:n) )
!
!       MAKE A COPY OF THE DATA MATRIX FOR LATER USE.
!
        a2(:m,:n) = a(:m,:n)
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE A COMPLETE ORTHOGONAL DECOMPOSITION OF RANDOM DATA MATRIX WITH SUBROUTINE qr_cmp2.
!
    call qr_cmp2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, tol=tol, tau=tau(:mn) )
!
!   qr_cmp2 COMPUTES A QR OR COMPLETE ORTHOGONAL FACTORIZATION OF A REAL m-BY-n MATRIX.
!   THE MATRIX MAY BE RANK-DEFICIENT.
!
!   HERE THE ROUTINE COMPUTES A COMPLETE ORTHOGONAL FACTORIZATION OF a.
!
!   A QR FACTORIZATION WITH COLUMN PIVOTING OF a IS FIRST COMPUTED AS:
!
!                     a * P = Q * R = Q * [ R11 R12 ]
!                                         [  0  R22 ]
!
!   P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX AND
!   R IS A m-BY-n UPPER TRIANGULAR OR TRAPEZOIDAL MATRIX.
!   
!   R11 IS DEFINED AS THE LARGEST LEADING SUBMATRIX OF R WHOSE ESTIMATED CONDITION
!   NUMBER  IS LESS THAN 1/tol IF tol IS IN ]0,1[ OR SUCH THAT ABS(R11[j,j])>0 IF
!   tol IS EQUAL TO 0 (SEE BELOW FOR DETAILS).
!   
!   THE ORDER OF R11 IS AN ESTIMATE OF THE RANK OF a AND IS STORED
!   IN THE ARGUMENT krank ON EXIT OF qr_cmp2.
!
!   IN OTHER WORDS, ON EXIT, krank CONTAINS THE EFFECTIVE RANK OF a. I.E., THE NUMBER
!   OF INDEPENDENT COLUMNS IN MATRIX a.
!
!   ON INPUT, IF krank=k, THIS IMPLIES THAT THE FIRST k COLUMNS OF a ARE TO BE FORCED
!   INTO THE BASIS. PIVOTING IS PERFORMED ON THE LAST n-k COLUMNS OF a.
!   
!   WHEN krank>=min(m,n) IS USED, PIVOTING IS NOT PERFORMED. THIS IS
!   APPROPRIATE WHEN a IS KNOWN TO BE FULL RANK. krank<=0 CAN BE USED
!   WHEN PIVOTING IS DESIRED ON ALL COLUMNS OF a.
!                      
!   IF tol IS PRESENT AND IS IN [0,1[, THEN :
!       ON ENTRY, CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF a
!       ( IN THE 1-NORM) ARE PERFORMED. tol IS THEN USED TO DETERMINE THE EFFECTIVE RANK
!       OF a, WHICH IS DEFINED AS THE ORDER OF THE LARGEST LEADING
!       TRIANGULAR SUBMATRIX R11 IN THE QR FACTORIZATION WITH PIVOTING OF a,
!       WHOSE ESTIMATED CONDITION NUMBER IS LESS THAN 1/tol. IF tol=0 IS SPECIFIED
!       THE NUMERICAL RANK OF a IS DETERMINED.
!
!   IF tol IS NOT SPECIFIED OR IS OUTSIDE [0,1[ :
!       THE CALCULATIONS TO DETERMINE THE CONDITION NUMBER OF a ARE NOT
!       PERFORMED AND CRUDE TESTS ON R(j,j) ARE DONE TO DETERMINE
!       THE NUMERICAL RANK OF a.
!
!   FURTHERMORE, IF tol IS PRESENT AND IS IN [0,1[ IN THE CALL OF qr_cmp2, THE CONDITION
!   NUMBER OF a IS COMPUTED IN ALL CASES AND ON EXIT, THE RECIPROCAL OF THE CONDITION NUMBER
!   IS RETURNED IN tol.
!
!   IF IT IS POSSIBLE THAT a MAY NOT BE OF FULL RANK (I.E., CERTAIN COLUMNS OF a ARE
!   LINEAR COMBINATIONS OF OTHER COLUMNS), THEN THE LINEARLY DEPENDENT COLUMNS CAN
!   USUALLY BE DETERMINED BY USING tol=RELATIVE PRECISION OF THE ELEMENTS IN a.
!   IF EACH ELEMENT IS CORRECT TO, SAY, 5 DIGITS THEN tol=0.00001 SHOULD BE USED.
!   
!   IN A SECOND STEP, IF THE OPTIONAL PARAMETER tau IS PRESENT IN THE CALL OF qr_cmp2,
!   THEN R22 IS CONSIDERED TO BE NEGLIGIBLE AND THE SUBMATRIX R12 IS ANNIHILATED BY
!   ORTHOGONAL TRANSFORMATIONS FROM THE RIGHT, ARRIVING AT THE COMPLETE ORTHOGONAL
!   FACTORIZATION:
!  
!                     a * P ≈ Q * [ T11 0 ] * Z
!                                 [  0  0 ]
!   
!   WHERE P IS A n-BY-n PERMUTATION MATRIX, Q IS A m-BY-m ORTHOGONAL MATRIX,
!   T11 IS A krank-BY-krank UPPER TRIANGULAR MATRIX AND Z IS A n-BY-n
!   ORTHOGONAL MATRIX. P AND Q ARE ARE ALREADY COMPUTED IN THE QR FACTORIZATION
!   WITH COLUM PIVOTING.
!
!   ON EXIT, a HAS BEEN OVERWRITTEN BY DETAILS OF ITS QR OR COMPLETE 
!   ORTHOGONAL FACTORIZATION, DEPENDING IF ARGUMENT tau IS ABSENT OR PRESENT.
!    
!   IF THE OPTIONAL PARAMETER tau IS ABSENT, qr_cmp2 COMPUTES ONLY
!   A FULL QR FACTORIZATION WITH COLUMN PIVOTING OF a AND:
!
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!    
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY a CONTAIN THE
!     CORRESPONDING ELEMENTS OF THE TRIANGULAR OR TRAPEZOIDAL MATRIX R.
!     THE ELEMENTS OF THE DIAGONAL OF R ARE STORED IN THE ARRAY diagr(:mn). 
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE QR DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a (E.G., THE RANK OF R11).
!                      
!   ON THE OTHER HAND, IF THE OPTIONAL PARAMETER tau IS PRESENT, qr_cmp2 COMPUTES A
!   COMPLETE ORTHOGONAL FACTORIZATION FROM THE QR FACTORIZATION WITH COLUMN PIVOTING OF a
!   AND:
!                      
!   - THE ELEMENTS BELOW AND ON THE DIAGONAL OF THE ARRAY a(:m,:mn) AND THE ARRAY
!     beta(:mn) STORED Q IN FACTORED FORM.
!                      
!   - THE ELEMENTS ABOVE THE DIAGONAL OF THE ARRAY SECTION a(1:krank,1:krank)
!     CONTAIN THE CORRESPONDING ELEMENTS OF THE TRIANGULAR MATRIX T11. THE ELEMENTS OF
!     THE DIAGONAL OF T11 ARE STORED IN THE ARRAY SECTION diagr(1:krank).
!
!   - ip STORES THE PERMUTATION MATRIX P IN THE COMPLETE DECOMPOSITION OF a.
!     IF ip(j)=k, THEN THE jTH COLUMN OF a*P WAS THE kTH COLUMN OF a.
!     THE MATRIX P IS REPRESENTED IN THE ARRAY ip AS FOLLOWS:
!     IF ip(j) = i THEN THE jTH COLUMN OF P IS THE iTH CANONICAL UNIT VECTOR.
!
!   - THE ORTHOGONAL MATRIX Z IS STORED IN FACTORED FORM IN THE ARRAY SECTIONS
!     a(:krank,krank+1:n) AND tau(:krank).
!                      
!   - krank CONTAINS THE EFFECTIVE RANK OF a, I.E., THE ORDER OF
!     THE SUBMATRIX T11.
!                      
!   THIS COMPLETE ORTHOGONAL FACTORIZATION CAN THEN BE USED TO COMPUTE THE MINIMAL 2-NORM
!   SOLUTIONS FOR RANK DEFICIENT LINEAR LEAST SQUARES PROBLEMS WITH SUBROUTINE qr_solve2.
!                      
!
!   NEXT, COMPUTE THE SOLUTION VECTOR FOR LINEAR LEAST SQUARE SYSTEM
!
!                   a(:m,:n)*x(:n) ≈ b(:m) .
!
!   WITH SUBROUTINE qr_solve2 AND THE COMPLETE ORTHOGONAL DECOMPOSITION COMPUTED BY qr_cmp2.
!
    call qr_solve2( a(:m,:n), diagr(:mn), beta(:mn), ip(:n), krank, b(:m), x(:n),  &
                    tau=tau(:mn), rnorm=rnorm, comp_resid=comp_resid )
!
!   qr_solve2 COMPUTES SOLUTION(S) TO REAL LINEAR LEAST SQUARES PROBLEMS 
!   OF THE FORM:
!
!                       Minimize || b - a*x ||_2
!
!   USING A QR FACTORIZATION WITH COLUMNS PIVOTING OR A COMPLETE
!   ORTHOGONAL FACTORIZATION OF a COMPUTED BY qr_cmp2. a IS AN m-BY-n MATRIX
!   WHICH MAY BE RANK-DEFICIENT. m>=n OR n>m IS PERMITTED.
!
!   SEVERAL RIGHT HAND SIDE VECTORS b AND SOLUTION VECTORS x CAN BE
!   HANDLED IN A SINGLE CALL; THEY ARE STORED AS THE COLUMNS OF THE
!   m-BY-nrhs RIGHT HAND SIDE MATRIX b AND THE n-BY-nrhs SOLUTION
!   MATRIX x, RESPECTIVELY. IN OTHER WORDS, THE ARGUMENTS b AND x MAY
!   BE VECTORS OR MATRICES. b IS OVERWRITTEN BY qr_solve2.
!   
!   ON EXIT, IF THE OPTIONAL LOGICAL PARAMETER comp_resid IS PRESENT AND IS SET TO true,
!   THE RESIDUAL VECTOR OR MATRIX b - a*x is COMPUTED AND OVERWRITES b.
!
!   THE 2-NORM OF THE RESIDUAL VECTOR(S) FOR THE SOLUTION VECTOR(S) x, CAN ALSO BE COMPUTED
!   DIRECTLY BY SPECIFYING THE OPTIONAL PARAMETER rnorm IN THE CALL OF qr_solve2.
!
!   THE MINIMUN 2-NORM SOLUTION(S) ARE COMPUTED IF THE OPTIONAL ARRAY PARAMETER tau IS
!   PRESENT IN BOTH THE CALLS OF qr_cmp2 AND qr_solve2 SUBROUTINES. OTHERWISE, SOLUTION(S) ARE
!   COMPUTED SUCH THAT IF THE jTH COLUMN OF a IS OMITTED FROM THE BASIS, x[j] O
!   x[j,:nrhs] IS SET TO ZERO.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
!   CHECK IF THE DEPENDENT COLUMNS IN MATRIX a HAVE BEEN IDENTIFIED CORRECTLY.
!
    if ( m>=n .and. idep<n ) then
!
        do l = krank+1_i4b, n
!
            j = ip(l)
!
            if ( j==idep .or. j==1_i4b .or. j==n ) exit
!
        end do
!
        test_lin = l/=n+1_i4b
!
    else
!
        test_lin = true
!
    end if
!
    if ( do_test ) then
!
!       CHECK THAT THE RESIDUALS ARE ORTHOGONAL TO THE RANGE OF THE COEFFICIENT MATRIX a .
!
        res(:n) = matmul( b(:m), a2(:m,:n) )
!
        err1 = maxval( abs(res(:n)) )/anorm
!
!       CHECK THE NORM OF THE RESIDUAL VECTOR.
!
        err2 = abs( norm( b(:m) ) - rnorm )
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( a2, res )
!
    end if
!
!   PRINT THE RESULTS OF THE TESTS.
!
    if ( err<=eps .and. test_lin ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*)
    write (prtunit,*) 'Estimated rank of the coefficient matrix = ', krank
!
    if ( krank/=mn ) then
        write (prtunit,*) 'Indices of linearly dependent columns    = ', ip(krank+1:n)
    end if
    write (prtunit,*) '2-norm of residual vector ||a*x-b||      = ', rnorm
!
    write (prtunit,*)
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for solving a linear least squares problem with a ', &
       m, ' by ', n,' real coefficient matrix is ', elapsed_time, ' seconds'
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, b, x, diagr, beta, tau, ip )
!
!
! END OF PROGRAM ex3_qr_cmp2
! ==========================
!
end program ex3_qr_cmp2

ex3_svd_cmp.F90

program ex3_svd_cmp
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine SVD_CMP
!   in module SVD_Procedures for computing all singular values of a real m-by-n matrix
!   by a fast variant of the Golub-Reinsch bidiagonal reduction algorithm and, at the user
!   option, the bisection, dqds or QR method with implicit shift methods for computing the
!   singular values of the bidiagonal form of the real m-by-n matrix.
!
!
! LATEST REVISION : 08/02/2024
!
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, seven, c30, c1_e6, &
                         svd_cmp, random_seed_, random_number_, gen_random_mat,     &
                         singval_sort, merror, allocate_error
!   
#ifdef _F2003
    use ieee_arithmetic, only : ieee_support_datatype, ieee_is_normal
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, m AND n ARE THE DIMENSIONS OF THE GENERATED MATRIX,
! nsvd0 IS THE NUMERICAL RANK OF THE GENERATED MATRIX (WHICH MUST BE LESS OR EQUAL TO k)
! FOR CASES GREATER THAN 0.
!
    integer(i4b), parameter :: prtunit = 6, m=3000, n=3000, k=min(m,n), nsvd0=3000
!   
! conda IS THE CONDITION NUMBER OF THE GENERATED MATRIX IN CASES 5, 6 AND 7.
!   
    real(stnd), parameter  :: conda=c1_e6
!
    character(len=*), parameter :: name_proc='Example 3 of svd_cmp'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: tmp, tmp2, abs_err, rel_err, elapsed_time
    real(stnd), allocatable, dimension(:)   :: s, s0, scal
    real(stnd), allocatable, dimension(:,:) :: a
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: i, mat_type
!
    logical(lgl) :: failure, bisect, dqds, do_test
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE EXAMPLE.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 3 : SINGULAR VALUES OF A FULL SVD OF A REAL MATRIX USING A
!               TWO-SIDED BIDIAGONAL REDUCTION ALGORITHM AND
!               THE BIDIAGONAL QR IMPLICIT, DQDS OR BISECTION METHOD FOR
!               COMPUTING THE SINGULAR VALUES.
!
!   SPECIFY THE TYPE OF INPUT MATRIX:
!
!   mat_type < 1  -> RANDOM MATRIX FROM THE UNIFORM DISTRIBUTION
!   mat_type = 1  -> SLOW DECAY OF SINGULAR VALUES
!   mat_type = 2  -> FAST DECAY OF SINGULAR VALUES
!   mat_type = 3  -> S-SHAPED DECAY OF SINGULAR VALUES
!   mat_type = 4  -> VERY SLOW DECAY OF SINGULAR VALUES
!   mat_type = 5  -> STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda
!                    SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE
!   mat_type = 6  -> GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type = 7  -> ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda
!                    SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE
!   mat_type > 7  -> UNIFORM DISTRIBUTION OF SINGULAR VALUES
!
    mat_type = 7_i4b
!
!   SPECIFY IF BISECTION OR DQDS IS USED FOR COMPUTING THE SINGULAR VALUES
!   INSTEAD OF THE QR IMPLICIT METHOD. BISECTION IS USUALLY MORE ACCURATE
!   BUT SLOWER.
!
    bisect = false
!
    dqds = true
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   SELECT THE UNIFORM RANDOM GENERATOR TO BE USED.
!
    call random_seed_( alg=3 )
!
!   RESET THE SEEDS USED BY THE UNIFORM RANDOM GENERATOR.
!
    call random_seed_()
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( a(m,n), s(k), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A RANDOM UNIFORM MATRIX OR SINGULAR VALUES.
!
    select case( mat_type )
!
        case( :0_i4b )
!
!           RANDOM UNIFORM MATRIX.
!
            call random_number_( a )
!
        case( 1_i4b )
!
!           SLOW DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = one/( tmp*tmp )
!        
            end do
!
        case( 2_i4b )
!
!           FAST DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = exp( -tmp/seven )
!        
            end do
!
        case( 3_i4b )
!
!           S-SHAPED DECAY OF SINGULAR VALUES.
!
            do i = 1_i4b, nsvd0
!
                tmp = real( i, stnd )
!
                s(i) = 0.0001_stnd + one/( one + exp( tmp - c30 ) )
!        
            end do
!
        case( 4_i4b )
!
!           VERY SLOW DECAY OF SINGULAR VALUES.
!
            tmp = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = exp( -tmp2/tmp )
!        
            end do
!
        case( 5_i4b )
!
!           STRONGLY CLUSTERED SINGULAR VALUES AT 1 WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH FEW DEFLATIONS FOR LAPACK GESDD ROUTINE.
!
            s(:nsvd0-1_i4b) = one
            s(nsvd0) = one/conda
!
        case( 6_i4b )
!
!           GEOMETRIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH MODERATE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
!
            do i = 1_i4b, nsvd0
!
                tmp2 = real( i - 1_i4b, stnd )
!
                s(i) = conda**( -tmp2/tmp )
!        
            end do
!
        case( 7_i4b )
!
!           ARITHMETIC DISTRIBUTION OF SINGULAR VALUES WITH CONDITION NUMBER conda.
!           THIS IS A SPECTRUM WITH COMPLETE DEFLATION FOR LAPACK GESDD ROUTINE.
!
            tmp  = real( nsvd0 - 1_i4b, stnd )
            tmp2 = one - one/conda
!
            do i = 1_i4b, nsvd0
!
                s(i) = one - (real( i - 1_i4b, stnd )/tmp)*tmp2
!        
            end do
!
        case default
!
!           UNIFORM DISTRIBUTION OF SINGULAR VALUES.
!
            call random_number_( s(:nsvd0) )
!
    end select
!
    if ( mat_type>0_i4b ) then
!
#ifdef _F2003
        if ( ieee_support_datatype( s ) ) then
!
            if ( .not.all( ieee_is_normal( s(:nsvd0) ) ) ) then
!
                call merror( name_proc//' : Exceptions occurred when generating the input matrix !'  )                    
!
            end if
!
        end if
#endif
!
!       SORT THE SINGULAR VALUES BY DECREASING MAGNITUDE.
!
        call singval_sort( 'D', s(:nsvd0) )
!
!       GENERATE A m-BY-n RANDOM REAL MATRIX a WITH A SPECIFIED DISTRIBUTION
!       OF SINGULAR VALUES AND A RANK EQUAL TO nsvd0.
!
        call gen_random_mat( s(:nsvd0), a )
!
    end if
!
    if ( do_test .and. mat_type>0_i4b ) then
!
!       ALLOCATE WORK ARRAY.
!
        allocate( s0(k), scal(k), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
!       SAVE THE TRUE SINGULAR VALUES.
!
        s0(:nsvd0) = s(:nsvd0)
        s0(nsvd0+1_i4b:k) = zero
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   svd_cmp COMPUTES THE SINGULAR VALUE DECOMPOSITION (SVD) OF A REAL
!   m-BY-n MATRIX a. THE SVD IS WRITTEN
!
!                       a = U * S * V**(t)
!
!   WHERE s IS AN m-BY-n MATRIX WHICH IS ZERO EXCEPT FOR ITS
!   min(m,n) DIAGONAL ELEMENTS, u IS AN m-BY-m ORTHOGONAL MATRIX, AND
!   v IS AN n-BY-n ORTHOGONAL MATRIX.  THE DIAGONAL ELEMENTS OF s
!   ARE THE SINGULAR VALUES OF a; THEY ARE REAL AND NON-NEGATIVE.
!   THE COLUMNS OF U AND V ARE THE LEFT AND RIGHT SINGULAR VECTORS OF a.
!
!   HERE, svd_cmp IS USED TO COMPUTE ONLY THE SINGULAR VALUES OF THE INPUT MATRIX.
!
    call svd_cmp( a, s, failure, sort=sort, bisect=bisect, dqds=dqds )
!
!   THE ROUTINE RETURNS THE SINGULAR VALUES IN ARGUMENT s .
!
!   ON EXIT OF svd_cmp :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE BIDIAGONAL
!                         SVD OF AN INTERMEDIATE BIDIAGONAL FORM B OF a .
!
!       s IS OVERWRITTEN WITH THE SINGULAR VALUES OF a.
!
!   IF SORT = 'a' OR 'A', THE SINGULAR VALUES ARE SORTED INTO ASCENDING ORDER.
!   IF SORT = 'd' OR 'D', THE SINGULAR VALUES ARE SORTED INTO DESCENDING ORDER.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!                
    if ( do_test .and. mat_type>0_i4b ) then
!
!       COMPUTE ABSOLUTE ERRORS OF SINGULAR VALUES.
!
        abs_err = maxval( abs( s(:k) - s0(:k) ) )
!
!       COMPUTE RELATIVE ERRORS OF SINGULAR VALUES.
!
        where( s0(:k)/=zero )
            scal(:k) = s0(:k)
        elsewhere
            scal(:k) = one
        end where
!
        rel_err = maxval( abs( (s(:k) - s0(:k))/scal(:k) ) )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( s0, scal )
!
    end if
!
!   DEALLOCATE WORK ARRAYS.
!
    deallocate( a, s )
!
!   CHECK THE RESULTS FOR SMALL RESIDUALS.
!
    if ( .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix type = ', mat_type
!
    write (prtunit,*) 
    write (prtunit,*) ' Matrix_type < 1  -> random matrix from the uniform distribution'
    write (prtunit,*) ' Matrix type = 1  -> slow decay of singular values'
    write (prtunit,*) ' Matrix type = 2  -> fast decay of singular values'
    write (prtunit,*) ' Matrix type = 3  -> s-shaped decay of singular values'
    write (prtunit,*) ' Matrix type = 4  -> very slow decay of singular values'
    write (prtunit,*) ' Matrix type = 5  -> strongly clustered singular values at 1'
    write (prtunit,*) '                     spectrum with few deflations (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 6  -> geometric distribution of singular values'
    write (prtunit,*) '                     spectrum with moderate deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type = 7  -> arithmetic distribution of singular values'
    write (prtunit,*) '                     spectrum with complete deflation (for LAPACK GESDD routine)'
    write (prtunit,*) ' Matrix type > 7  -> uniform distribution of singular values'
!
    if ( do_test .and. mat_type>0_i4b  ) then
!
        write (prtunit,*) 
!
        write (prtunit,*) 'Absolute accuracy of the computed singular values    = ', abs_err
        write (prtunit,*) 'Relative accuracy of the computed singular values    = ', rel_err
!
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE    ( from svd_cmp() ) = ', failure
!
    write (prtunit,*) 
    write (*,'(a,i5,a,i5,a,0pd12.4,a)')    &
      'The elapsed time for computing all singular values of a ', &
       m, ' by ', n,' real matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex3_svd_cmp
! ==========================
!
end program ex3_svd_cmp

ex3_trid_deflate.F90

program ex3_trid_deflate
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine TRID_DEFLATE
!   in module Eig_Procedures for computing selected eigenvectors of a real
!   symmetric tridiagonal matrix by a (Godunov) deflation method.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine SYMTRID_BISECT in module Eig_Procedures
!    for computing all or selected eigenvalues of a real symmetric tridiagonal matrix by a
!    bisection method.
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, one, two, c50, safmin, unit_matrix,  &
                         trid_deflate, symtrid_bisect, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED TRIDIAGONAL MATRIX,
! neig IS THE NUMBER OF EIGENVALUES AND EIGENVECTORS, WHICH MUST BE COMPUTED.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig=3000
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 3 of trid_deflate'
!   
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, abstol, &
                                               normr, normt, elapsed_time
    real(stnd), allocatable, dimension(:)   :: d, e, eigval, temp, temp2
    real(stnd), allocatable, dimension(:,:) :: resid, eigvec
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: neig2, j, max_qr_steps
!
    logical(lgl) :: failure, failure2, do_test, ortho
!   
    character    :: sort='d'
!   
!
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 3 : SELECTED EIGENVALUES AND EIGENVECTORS OF
!               A REAL SYMMETRIC TRIDIAGONAL MATRIX USING
!               BISECTION AND DEFLATION METHODS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE PARAMETERS OF THE BISECTION ALGORITHM.
!
!   THE EIGENVALUES ARE COMPUTED WITH MAXIMUM ACCURACY WHEN OPTIONAL PARAMETER abstol IS SET TO
!   sqrt( safmin ), WHICH IS THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD.
!
    abstol = sqrt( safmin )
!
!   DETERMINE PARAMETERS OF THE DEFLATION ALGORITHM.
!
!   OPTIONAL PARAMETER ortho DETERMINES IF DEFLATION IS APPLIED SEQUENTIALLY TO ALL
!   COMPUTED EIGENVECTORS (ortho=true) OR ONLY IN EACH CLUSTER OF EIGENVALUES
!   (ortho=false). IN THE LATTER CASE, ORTHOGONALITY OF THE EIGENVECTORS MAY BE
!   SLIGHTLY DEGRADED, BUT COMPUTATIONS WILL BE MUCH FASTER.
!
    ortho = false
!
!   OPTIONAL PARAMETER max_qr_steps CONTROLS THE MAXIMUM NUMBER OF QR SWEEPS FOR
!   DEFLATING A TRIDIAGONAL MATRIX FOR A GIVEN EIGENVALUE IN THE DEFLATION ALGORITHM.
!   THE ALGORITHM FAILS TO CONVERGE IF THE TOTAL NUMBER OF QR SWEEPS FOR ALL REQUESTED
!   EIGENVALUES EXCEEDS max_qr_steps * nvec.
!
    max_qr_steps = 4_i4b
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( d(n), e(n), eigval(n), eigvec(n,neig), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN d .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN e .
!
!    d(:n) = two
!    e(:n) = one
!
!    d(:n) = 0.5_stnd
!    e(:n) = 0.5_stnd
!
!   GENERATE A RANDOM TRIDIAGONAL MATRIX.
!
    call random_number( d(:n) )
    call random_number( e(:n) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( resid(n,neig), temp(n), temp2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE THE FIRST neig EIGENVALUES OF THE TRIDIAGONAL MATRIX TO HIGH ACCURACY BY BISECTION
!   WITH SUBROUTINE symtrid_bisect.
!
    call symtrid_bisect( d, e, neig2, eigval, failure, sort=sort, le=neig, abstol=abstol )
!
!   ON EXIT OF symtrid_bisect:
!
!       failure= false :  INDICATES SUCCESSFUL EXIT.
!       failure= true  :  INDICATES THAT THE ALGORITHM DID NOT CONVERGE AND
!                         THAT FULL ACCURACY WAS NOT ATTAINED IN THE EIGENDECOMPOSITION
!                         OF THE TRIDIAGONAL MATRIX.
!
!       eigval IS OVERWRITTEN WITH THE EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!       IF SORT = 'a' OR 'A', THE EIGENVALUES ARE SORTED INTO ASCENDING ORDER.
!       IF SORT = 'd' OR 'D', THE EIGENVALUES ARE SORTED INTO DESCENDING ORDER.
!
!   COMPUTE THE FIRST neig EIGENVECTORS OF THE TRIDIAGONAL MATRIX BY A
!   DEFLATION TECHNIQUE WITH SUBROUTINE trid_deflate.
!
!   ON ENTRY, PARAMETER eigval CONTAINS SELECTED EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!   THE EIGENVALUES VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call trid_deflate( d(:n), e(:n), eigval(:neig), eigvec(:n,:neig), failure2,   &
                       ortho=ortho, max_qr_steps=max_qr_steps )
!
!   ON EXIT OF trid_deflate :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM

!       eigvec CONTAINS THE neig EIGENVECTORS ASSOCIATED WITH THE EIGENVALUES eigval(:neig).
!
!   trid_deflate MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!   IDENTICAL AND IF THESE EIGENVALUES ARE NOT COMPUTED WITH HIGH ACCURACY.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u - u*s
!       WHERE s ARE THE EIGENVALUES AND u THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX.
!
        do j=1_i4b, neig
!
            temp(1_i4b:n) = eigvec(1_i4b:n,j)
!
            temp2(1_i4b)         = d(1_i4b)*temp(1_i4b) + e(1_i4b)*temp(2_i4b)
            temp2(2_i4b:n-1_i4b) = e(1_i4b:n-2_i4b)*temp(1_i4b:n-2_i4b)           +   &
                                   d(2_i4b:n-1_i4b)*temp(2_i4b:n-1_i4b)           +   &
                                   e(2_i4b:n-1_i4b)*temp(3_i4b:n)
            temp2(n)             = e(n-1_i4b)*temp(n-1_i4b) + d(n)*temp(n)
!
            resid(1_i4b:n,j) = temp2(1_i4b:n) - eigval(j)*temp(1_i4b:n)
!
        end do
!
        temp(:neig) = norm( resid(:n,:neig), dim=2_i4b )
!
        normr = maxval( temp(:neig) )
        normt = sqrt( sum( d(1_i4b:n)**2 ) + sum( e(1_i4b:n-1_i4b)**2 ) )
!
        err1  = normr/( normt*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX a.
!
        call unit_matrix( resid(:neig,:neig) )
!
        resid(:neig,:neig) = abs( resid(:neig,:neig) - matmul( transpose( eigvec ), eigvec ) )
!
        normr = maxval( resid(:neig,:neig) )
!
        err2  = normr/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( d, e, eigval, eigvec, resid, temp, temp2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( d, e, eigval, eigvec )
!
    end if
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_bisect() ) = ', failure
    write (prtunit,*) ' FAILURE ( from trid_deflate()   ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', neig,' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex3_trid_deflate
! ===============================
!
end program ex3_trid_deflate

ex3_trid_inviter.F90

program ex3_trid_inviter
!
!
! Purpose
! =======
!
!   This program is intended to demonstrate the use of subroutine TRID_INVITER
!   in module EIG_Procedures for computing selected eigenvectors of a real
!   symmetric tridiagonal matrix by an inverse iteration method.
!
!
! Further Details
! ===============
!
!    The program also shows the use of subroutine SYMTRID_BISECT in module EIG_Procedures
!    for computing all or selected eigenvalues of a real symmetric tridiagonal matrix by a
!    bisection method.
!
! LATEST REVISION : 12/12/2023
!                                                                              
! ================================================================================================
!
!
! USED MODULES 
! ============
!
    use Statpack, only : lgl, i4b, stnd, true, false, zero, half, one, two, c50, safmin,       &
                         trid_inviter, symtrid_bisect, unit_matrix, norm, merror, allocate_error
#ifdef _MATMUL
    use Statpack, only : matmul=>matmul2
#endif
#ifdef _TRANSPOSE
    use Statpack, only : transpose=>transpose2
#endif
!   
!
! STRONG TYPING IMPOSED 
! =====================
!    
    implicit none
!   
!   
! PARAMETERS 
! ==========
!
! prtunit IS THE PRINTING UNIT, n IS THE DIMENSION OF THE GENERATED TRIDIAGONAL MATRIX,
! neig IS THE NUMBER OF EIGENVALUES AND EIGENVECTORS, WHICH MUST BE COMPUTED,
! maxiter IS THE NUMBER OF INVERSE ITERATIONS PERFORMED TO COMPUTE EIGENVECTORS.
!
    integer(i4b), parameter :: prtunit=6, n=3000, neig=3000, maxiter=2
!   
! fudge IS A FUDGING FACTOR FOR THE ACCURACY TESTS.
!   
    real(stnd), parameter  :: fudge=c50
!
    character(len=*), parameter :: name_proc='Example 3 of trid_inviter'
!
!
! SPECIFICATIONS FOR VARIABLES
! ============================
!
    real(stnd)                              :: err1, err2, err, eps, abstol, &
                                               normr, normt, elapsed_time
    real(stnd), allocatable, dimension(:)   :: diag, sup, sup2, eigval
    real(stnd), allocatable, dimension(:,:) :: resid, eigvec
!
    integer      :: iok, istart, iend, irate, imax, itime
    integer(i4b) :: neig2
!
    logical(lgl) :: failure, failure2, do_test
!   
    character    :: sort='d'
!   
!   
! EXECUTABLE STATEMENTS
! =====================
!
!   PRINT LABEL OF THE TEST.
!
    write (prtunit,*)
    write (prtunit,*) name_proc,' :'
    write (prtunit,*) repeat('*', len(name_proc) )
    write (prtunit,*)
!
!   EXAMPLE 3 : SELECTED EIGENVALUES AND EIGENVECTORS OF
!               A REAL SYMMETRIC TRIDIAGONAL MATRIX USING
!               BISECTION FOR THE EIGENVALUES AND THE INVERSE
!               ITERATION METHOD FOR THE EIGENVECTORS.
!
!   SET THE REQUIRED PRECISION OF THE RESULTS.
!
    eps = fudge*epsilon( err )
    err = zero
!
!   SPECIFY IF NUMERICAL TESTS MUST BE PERFORMED.
!
    do_test = true
!
!   DETERMINE PARAMETERS OF THE BISECTION ALGORITHM.
!
!   THE EIGENVALUES ARE COMPUTED WITH MAXIMUM ACCURACY WHEN OPTIONAL PARAMETER abstol IS SET TO
!   sqrt( safmin ), WHICH IS THE SQUARE ROOT OF THE UNDERFLOW THRESHOLD.
!
    abstol = sqrt( safmin )
!
!   ALLOCATE WORK ARRAYS.
!
    allocate( diag(n), sup(n), eigval(n), eigvec(n,neig), stat=iok )
!
    if ( iok/=0 ) then
        call merror( name_proc//allocate_error )
    end if
!
!   GENERATE A TRIDIAGONAL MATRIX.
!   THE DIAGONAL ELEMENTS ARE STORED IN diag .
!   THE OFF-DIAGONAL ELEMENTS ARE STORED IN sup .
!
!    diag(:n) = two
!    sup(:n)  = one
!
!    diag(:n) = 0.5_stnd
!    sup(:n)  = 0.5_stnd
!
!   GENERATE A RANDOM TRIDIAGONAL MATRIX.
!
    call random_number( diag(:n) )
    call random_number( sup(:n) )
!
    if ( do_test ) then
!
!       ALLOCATE WORK ARRAYS.
!
        allocate( resid(n,neig), sup2(n), stat=iok )
!
        if ( iok/=0 ) then
            call merror( name_proc//allocate_error )
        end if
!
    end if
!
!   START TIMING THE COMPUTATIONS.
!
    call system_clock( count_rate=irate, count_max=imax  )
    call system_clock( count=istart )
!
!   COMPUTE SELECTED EIGENVALUES OF THE TRIDIAGONAL MATRIX BY BISECTION.
!
    call symtrid_bisect( diag, sup, neig2, eigval, failure, sort=sort, le=neig, abstol=abstol )
!
!   COMPUTE THE FIRST neig EIGENVECTORS OF THE TRIDIAGONAL MATRIX BY maxiter INVERSE ITERATIONS
!   WITH SUBROUTINE trid_inviter.
!
!   ON ENTRY, PARAMETER eigval CONTAINS SELECTED EIGENVALUES OF THE TRIDIAGONAL MATRIX.
!   THE EIGENVALUES VALUES MUST BE GIVEN IN DECREASING ORDER.
!
    call trid_inviter( diag(:n), sup(:n), eigval(:neig), eigvec(:n,:neig), failure2, maxiter=maxiter )
!
!   ON EXIT OF trid_inviter :
!
!       failure= false :  INDICATES SUCCESSFUL EXIT
!       failure= true  :  INDICATES THAT MAXIMUM ACCURACY WAS NOT OBTAIN IN THE ALGORITHM

!       eigvec CONTAINS THE neig EIGENVECTORS ASSOCIATED WITH THE EIGENVALUES eigval(:neig).
!
!   trid_inviter MAY FAIL IF SOME THE EIGENVALUES SPECIFIED IN PARAMETER eigval ARE NEARLY
!   IDENTICAL.
!
!   STOP THE TIMER.
!
    call system_clock( count=iend )
!
    itime = iend - istart
    if ( iend<istart ) then
        itime = itime + imax   
    end if
!
    elapsed_time = real( itime, stnd )/real( irate, stnd )
!
    if ( do_test ) then
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION a*u - u*s,
!       WHERE s ARE THE EIGENVALUES AND u THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX .
!
        sup2(:n) = eoshift( sup(:n), -1 )
        resid(:n,:neig) = spread( diag(:n),         dim=2, ncopies=neig )*eigvec                                + &
                          spread( sup2(:n),         dim=2, ncopies=neig )*eoshift( eigvec, shift=-1, dim=1 )    + &
                          eoshift( spread(sup2(:n), dim=2, ncopies=neig)*eigvec, shift=1 )                      - &
                          spread( eigval(:neig),    dim=1, ncopies=n )*eigvec
!
        sup(:neig) = norm( resid(:n,:neig), dim=2_i4b )
!
        normr = maxval( sup(:neig) )
        normt = sqrt( sum( diag(1_i4b:n)**2 ) + sum( sup(1_i4b:n-1_i4b)**2 ) )
!
        err1  = normr/( normt*real(n,stnd) )
!
!       CHECK FOR SMALL RESIDUALS OF THE EXPRESSION I - u**(t)*u
!       WHERE u are THE EIGENVECTORS OF THE TRIDIAGONAL MATRIX .
!
        call unit_matrix( resid(:neig,:neig) )
!
        resid(:neig,:neig) = abs( resid(:neig,:neig) - matmul( transpose( eigvec ), eigvec ) )
!
        normr = maxval( resid(:neig,:neig) )
!
        err2  = normr/real(n,stnd)
!
        err = max( err1, err2 )
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( diag, sup, eigval, eigvec, resid, sup2 )
!
    else
!
!       DEALLOCATE WORK ARRAYS.
!
        deallocate( diag, sup, eigval, eigvec )
!
    end if
!
    if ( err<=eps .and. .not.failure ) then
        write (prtunit,*) name_proc//' is correct'
    else
        write (prtunit,*) name_proc//' is incorrect'
    end if
!
    write (prtunit,*) 
    write (prtunit,*) ' FAILURE ( from symtrid_bisect() ) = ', failure
    write (prtunit,*) ' FAILURE ( from trid_inviter()   ) = ', failure2
!
    if ( do_test ) then
        write (prtunit,*) 
        write (prtunit,*) 'Accuracy of the computed eigenvalues and eigenvectors = ', err1
        write (prtunit,*) 'Orthogonality of the computed eigenvectors            = ', err2
    end if
!
    write (prtunit,*) 
    write (*,'(a,i6,a,i6,a,i6,a,0pd12.4,a)')    &
      'The elapsed time for computing ', neig,' eigenvalues and eigenvectors of a ', &
       n, ' by ', n,' real symmetric tridiagonal matrix is ', elapsed_time, ' seconds'
!
!
! END OF PROGRAM ex3_trid_inviter
! ===============================
!
end program ex3_trid_inviter
Flag Counter