mirror of
				https://github.com/IcedRooibos/py32f0-template.git
				synced 2025-10-31 01:42:04 -07:00 
			
		
		
		
	
		
			
				
	
	
		
			892 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			892 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* ----------------------------------------------------------------------
 | |
|  * Project:      CMSIS DSP Library
 | |
|  * Title:        arm_mat_inverse_f16.c
 | |
|  * Description:  Floating-point matrix inverse
 | |
|  *
 | |
|  * $Date:        23 April 2021
 | |
|  * $Revision:    V1.9.0
 | |
|  *
 | |
|  * Target Processor: Cortex-M and Cortex-A cores
 | |
|  * -------------------------------------------------------------------- */
 | |
| /*
 | |
|  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 | |
|  *
 | |
|  * SPDX-License-Identifier: Apache-2.0
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the License); you may
 | |
|  * not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  * www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 | |
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| #include "dsp/matrix_functions_f16.h"
 | |
| 
 | |
| #if defined(ARM_FLOAT16_SUPPORTED)
 | |
| 
 | |
| 
 | |
| /**
 | |
|   @ingroup groupMatrix
 | |
|  */
 | |
| 
 | |
| 
 | |
| /**
 | |
|   @addtogroup MatrixInv
 | |
|   @{
 | |
|  */
 | |
| 
 | |
| /**
 | |
|   @brief         Floating-point matrix inverse.
 | |
|   @param[in]     pSrc      points to input matrix structure. The source matrix is modified by the function.
 | |
|   @param[out]    pDst      points to output matrix structure
 | |
|   @return        execution status
 | |
|                    - \ref ARM_MATH_SUCCESS       : Operation successful
 | |
|                    - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
 | |
|                    - \ref ARM_MATH_SINGULAR      : Input matrix is found to be singular (non-invertible)
 | |
|  */
 | |
| #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
 | |
| 
 | |
| arm_status arm_mat_inverse_f16(
 | |
|   const arm_matrix_instance_f16 * pSrc,
 | |
|   arm_matrix_instance_f16 * pDst)
 | |
| {
 | |
|     float16_t *pIn = pSrc->pData;   /* input data matrix pointer */
 | |
|     float16_t *pOut = pDst->pData;  /* output data matrix pointer */
 | |
|     float16_t *pInT1, *pInT2;   /* Temporary input data matrix pointer */
 | |
|     float16_t *pOutT1, *pOutT2; /* Temporary output data matrix pointer */
 | |
|     float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;    /* Temporary input and output data matrix pointer */
 | |
| 
 | |
|     uint32_t  numRows = pSrc->numRows;  /* Number of rows in the matrix  */
 | |
|     uint32_t  numCols = pSrc->numCols;  /* Number of Cols in the matrix  */
 | |
|     float16_t *pTmpA, *pTmpB;
 | |
| 
 | |
|     _Float16 in = 0.0f16;        /* Temporary input values  */
 | |
|     uint32_t  i, rowCnt, flag = 0U, j, loopCnt, l;   /* loop counters */
 | |
|     arm_status status;          /* status of matrix inverse */
 | |
|     uint32_t  blkCnt;
 | |
| 
 | |
| #ifdef ARM_MATH_MATRIX_CHECK
 | |
|    /* Check for matrix mismatch condition */
 | |
|   if ((pSrc->numRows != pSrc->numCols) || (pDst->numRows != pDst->numCols)
 | |
|      || (pSrc->numRows != pDst->numRows))
 | |
|   {
 | |
|     /* Set status as ARM_MATH_SIZE_MISMATCH */
 | |
|     status = ARM_MATH_SIZE_MISMATCH;
 | |
|   }
 | |
|   else
 | |
| #endif /*    #ifdef ARM_MATH_MATRIX_CHECK    */
 | |
|   {
 | |
| 
 | |
|     /*--------------------------------------------------------------------------------------------------------------
 | |
|      * Matrix Inverse can be solved using elementary row operations.
 | |
|      *
 | |
|      *  Gauss-Jordan Method:
 | |
|      *
 | |
|      *     1. First combine the identity matrix and the input matrix separated by a bar to form an
 | |
|      *        augmented matrix as follows:
 | |
|      *                      _  _          _     _      _   _         _         _
 | |
|      *                     |  |  a11  a12  | | | 1   0  |   |       |  X11 X12  |
 | |
|      *                     |  |            | | |        |   |   =   |           |
 | |
|      *                     |_ |_ a21  a22 _| | |_0   1 _|  _|       |_ X21 X21 _|
 | |
|      *
 | |
|      *      2. In our implementation, pDst Matrix is used as identity matrix.
 | |
|      *
 | |
|      *      3. Begin with the first row. Let i = 1.
 | |
|      *
 | |
|      *      4. Check to see if the pivot for row i is zero.
 | |
|      *         The pivot is the element of the main diagonal that is on the current row.
 | |
|      *         For instance, if working with row i, then the pivot element is aii.
 | |
|      *         If the pivot is zero, exchange that row with a row below it that does not
 | |
|      *         contain a zero in column i. If this is not possible, then an inverse
 | |
|      *         to that matrix does not exist.
 | |
|      *
 | |
|      *      5. Divide every element of row i by the pivot.
 | |
|      *
 | |
|      *      6. For every row below and  row i, replace that row with the sum of that row and
 | |
|      *         a multiple of row i so that each new element in column i below row i is zero.
 | |
|      *
 | |
|      *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
 | |
|      *         for every element below and above the main diagonal.
 | |
|      *
 | |
|      *      8. Now an identical matrix is formed to the left of the bar(input matrix, src).
 | |
|      *         Therefore, the matrix to the right of the bar is our solution(dst matrix, dst).
 | |
|      *----------------------------------------------------------------------------------------------------------------*/
 | |
| 
 | |
|         /*
 | |
|          * Working pointer for destination matrix
 | |
|          */
 | |
|         pOutT1 = pOut;
 | |
|         /*
 | |
|          * Loop over the number of rows
 | |
|          */
 | |
|         rowCnt = numRows;
 | |
|         /*
 | |
|          * Making the destination matrix as identity matrix
 | |
|          */
 | |
|         while (rowCnt > 0U)
 | |
|         {
 | |
|             /*
 | |
|              * Writing all zeroes in lower triangle of the destination matrix
 | |
|              */
 | |
|             j = numRows - rowCnt;
 | |
|             while (j > 0U)
 | |
|             {
 | |
|                 *pOutT1++ = 0.0f16;
 | |
|                 j--;
 | |
|             }
 | |
|             /*
 | |
|              * Writing all ones in the diagonal of the destination matrix
 | |
|              */
 | |
|             *pOutT1++ = 1.0f16;
 | |
|             /*
 | |
|              * Writing all zeroes in upper triangle of the destination matrix
 | |
|              */
 | |
|             j = rowCnt - 1U;
 | |
|             while (j > 0U)
 | |
|             {
 | |
|                 *pOutT1++ = 0.0f16;
 | |
|                 j--;
 | |
|             }
 | |
|             /*
 | |
|              * Decrement the loop counter
 | |
|              */
 | |
|             rowCnt--;
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * Loop over the number of columns of the input matrix.
 | |
|          * All the elements in each column are processed by the row operations
 | |
|          */
 | |
|         loopCnt = numCols;
 | |
|         /*
 | |
|          * Index modifier to navigate through the columns
 | |
|          */
 | |
|         l = 0U;
 | |
|         while (loopCnt > 0U)
 | |
|         {
 | |
|             /*
 | |
|              * Check if the pivot element is zero..
 | |
|              * If it is zero then interchange the row with non zero row below.
 | |
|              * If there is no non zero element to replace in the rows below,
 | |
|              * then the matrix is Singular.
 | |
|              */
 | |
| 
 | |
|             /*
 | |
|              * Working pointer for the input matrix that points
 | |
|              * * to the pivot element of the particular row
 | |
|              */
 | |
|             pInT1 = pIn + (l * numCols);
 | |
|             /*
 | |
|              * Working pointer for the destination matrix that points
 | |
|              * * to the pivot element of the particular row
 | |
|              */
 | |
|             pOutT1 = pOut + (l * numCols);
 | |
|             /*
 | |
|              * Temporary variable to hold the pivot value
 | |
|              */
 | |
|             in = *pInT1;
 | |
|             
 | |
| 
 | |
|             /*
 | |
|              * Check if the pivot element is zero
 | |
|              */
 | |
|             if ((_Float16)*pInT1 == 0.0f16)
 | |
|             {
 | |
|                 /*
 | |
|                  * Loop over the number rows present below
 | |
|                  */
 | |
|                 for (i = 1U; i < numRows-l; i++)
 | |
|                 {
 | |
|                     /*
 | |
|                      * Update the input and destination pointers
 | |
|                      */
 | |
|                     pInT2 = pInT1 + (numCols * i);
 | |
|                     pOutT2 = pOutT1 + (numCols * i);
 | |
|                     /*
 | |
|                      * Check if there is a non zero pivot element to
 | |
|                      * * replace in the rows below
 | |
|                      */
 | |
|                     if ((_Float16)*pInT2 != 0.0f16)
 | |
|                     {
 | |
|                         f16x8_t vecA, vecB;
 | |
|                         /*
 | |
|                          * Loop over number of columns
 | |
|                          * * to the right of the pilot element
 | |
|                          */
 | |
|                         pTmpA = pInT1;
 | |
|                         pTmpB = pInT2;
 | |
|                         blkCnt = (numCols - l) >> 3;
 | |
|                         while (blkCnt > 0U)
 | |
|                         {
 | |
|                             
 | |
|                             vecA = vldrhq_f16(pTmpA);
 | |
|                             vecB = vldrhq_f16(pTmpB);
 | |
|                             vstrhq_f16(pTmpB, vecA);
 | |
|                             vstrhq_f16(pTmpA, vecB);
 | |
| 
 | |
|                             pTmpA += 8;
 | |
|                             pTmpB += 8;
 | |
|                             /*
 | |
|                              * Decrement the blockSize loop counter
 | |
|                              */
 | |
|                             blkCnt--;
 | |
|                         }
 | |
|                         /*
 | |
|                          * tail
 | |
|                          * (will be merged thru tail predication)
 | |
|                          */
 | |
|                         blkCnt = (numCols - l) & 7;
 | |
|                         if (blkCnt > 0U)
 | |
|                         {
 | |
|                             mve_pred16_t p0 = vctp16q(blkCnt);
 | |
| 
 | |
|                             vecA = vldrhq_f16(pTmpA);
 | |
|                             vecB = vldrhq_f16(pTmpB);
 | |
|                             vstrhq_p_f16(pTmpB, vecA, p0);
 | |
|                             vstrhq_p_f16(pTmpA, vecB, p0);
 | |
|                         }
 | |
| 
 | |
|                         pInT1 += numCols - l;
 | |
|                         pInT2 += numCols - l;
 | |
|                         pTmpA = pOutT1;
 | |
|                         pTmpB = pOutT2;
 | |
|                         blkCnt = numCols >> 3;
 | |
|                         while (blkCnt > 0U)
 | |
|                         {
 | |
| 
 | |
|                             vecA = vldrhq_f16(pTmpA);
 | |
|                             vecB = vldrhq_f16(pTmpB);
 | |
|                             vstrhq_f16(pTmpB, vecA);
 | |
|                             vstrhq_f16(pTmpA, vecB);
 | |
|                             pTmpA += 8;
 | |
|                             pTmpB += 8;
 | |
|                             /*
 | |
|                              * Decrement the blockSize loop counter
 | |
|                              */
 | |
|                             blkCnt--;
 | |
|                         }
 | |
|                         /*
 | |
|                          * tail
 | |
|                          */
 | |
|                         blkCnt = numCols & 7;
 | |
|                         if (blkCnt > 0U)
 | |
|                         {
 | |
|                             mve_pred16_t p0 = vctp16q(blkCnt);
 | |
| 
 | |
|                             vecA = vldrhq_f16(pTmpA);
 | |
|                             vecB = vldrhq_f16(pTmpB);
 | |
|                             vstrhq_p_f16(pTmpB, vecA, p0);
 | |
|                             vstrhq_p_f16(pTmpA, vecB, p0);
 | |
|                         }
 | |
| 
 | |
|                         pOutT1 += numCols;
 | |
|                         pOutT2 += numCols;
 | |
|                         /*
 | |
|                          * Flag to indicate whether exchange is done or not
 | |
|                          */
 | |
|                         flag = 1U;
 | |
| 
 | |
|                         /*
 | |
|                          * Break after exchange is done
 | |
|                          */
 | |
|                         break;
 | |
|                     }
 | |
|               
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             /*
 | |
|              * Update the status if the matrix is singular
 | |
|              */
 | |
|             if ((flag != 1U) && (in == 0.0f16))
 | |
|             {
 | |
|                 return ARM_MATH_SINGULAR;
 | |
|             }
 | |
| 
 | |
|             /*
 | |
|              * Points to the pivot row of input and destination matrices
 | |
|              */
 | |
|             pPivotRowIn = pIn + (l * numCols);
 | |
|             pPivotRowDst = pOut + (l * numCols);
 | |
| 
 | |
|             /*
 | |
|              * Temporary pointers to the pivot row pointers
 | |
|              */
 | |
|             pInT1 = pPivotRowIn;
 | |
|             pOutT1 = pPivotRowDst;
 | |
| 
 | |
|             /*
 | |
|              * Pivot element of the row
 | |
|              */
 | |
|             in = *(pIn + (l * numCols));
 | |
| 
 | |
|             pTmpA = pInT1;
 | |
| 
 | |
|             f16x8_t invIn = vdupq_n_f16(1.0f16 / in);
 | |
| 
 | |
|             blkCnt = (numCols - l) >> 3;
 | |
|             f16x8_t vecA;
 | |
|             while (blkCnt > 0U)
 | |
|             {
 | |
|                 *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA * invIn;
 | |
|                 pTmpA += 8;
 | |
|                 /*
 | |
|                  * Decrement the blockSize loop counter
 | |
|                  */
 | |
|                 blkCnt--;
 | |
|             }
 | |
|             /*
 | |
|              * tail
 | |
|              */
 | |
|             blkCnt = (numCols - l) & 7;
 | |
|             if (blkCnt > 0U)
 | |
|             {
 | |
|                 mve_pred16_t p0 = vctp16q(blkCnt);
 | |
|                 
 | |
| 
 | |
|                 vecA = vldrhq_f16(pTmpA);
 | |
|                 vecA = vecA * invIn;
 | |
|                 vstrhq_p_f16(pTmpA, vecA, p0);
 | |
|             }
 | |
| 
 | |
|             pInT1 += numCols - l;
 | |
|             /*
 | |
|              * Loop over number of columns
 | |
|              * * to the right of the pilot element
 | |
|              */
 | |
| 
 | |
|             pTmpA = pOutT1;
 | |
|             blkCnt = numCols >> 3;
 | |
|             while (blkCnt > 0U)
 | |
|             {
 | |
|                 *(f16x8_t *) pTmpA = *(f16x8_t *) pTmpA *invIn;
 | |
|                 pTmpA += 8;
 | |
|                 /*
 | |
|                  * Decrement the blockSize loop counter
 | |
|                  */
 | |
|                 blkCnt--;
 | |
|             }
 | |
|             /*
 | |
|              * tail
 | |
|              * (will be merged thru tail predication)
 | |
|              */
 | |
|             blkCnt = numCols & 7;
 | |
|             if (blkCnt > 0U)
 | |
|             {
 | |
|                 mve_pred16_t p0 = vctp16q(blkCnt);
 | |
| 
 | |
|                 vecA = vldrhq_f16(pTmpA);
 | |
|                 vecA = vecA * invIn;
 | |
|                 vstrhq_p_f16(pTmpA, vecA, p0);
 | |
|             }
 | |
| 
 | |
|             pOutT1 += numCols;
 | |
| 
 | |
|             /*
 | |
|              * Replace the rows with the sum of that row and a multiple of row i
 | |
|              * * so that each new element in column i above row i is zero.
 | |
|              */
 | |
| 
 | |
|             /*
 | |
|              * Temporary pointers for input and destination matrices
 | |
|              */
 | |
|             pInT1 = pIn;
 | |
|             pOutT1 = pOut;
 | |
| 
 | |
|             for (i = 0U; i < numRows; i++)
 | |
|             {
 | |
|                 /*
 | |
|                  * Check for the pivot element
 | |
|                  */
 | |
|                 if (i == l)
 | |
|                 {
 | |
|                     /*
 | |
|                      * If the processing element is the pivot element,
 | |
|                      * only the columns to the right are to be processed
 | |
|                      */
 | |
|                     pInT1 += numCols - l;
 | |
|                     pOutT1 += numCols;
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     /*
 | |
|                      * Element of the reference row
 | |
|                      */
 | |
| 
 | |
|                     /*
 | |
|                      * Working pointers for input and destination pivot rows
 | |
|                      */
 | |
|                     pPRT_in = pPivotRowIn;
 | |
|                     pPRT_pDst = pPivotRowDst;
 | |
|                     /*
 | |
|                      * Loop over the number of columns to the right of the pivot element,
 | |
|                      * to replace the elements in the input matrix
 | |
|                      */
 | |
| 
 | |
|                     in = *pInT1;
 | |
|                     f16x8_t tmpV = vdupq_n_f16(in);
 | |
| 
 | |
|                     blkCnt = (numCols - l) >> 3;
 | |
|                     while (blkCnt > 0U)
 | |
|                     {
 | |
|                         f16x8_t vec1, vec2;
 | |
|                         /*
 | |
|                          * Replace the element by the sum of that row
 | |
|                          * and a multiple of the reference row
 | |
|                          */
 | |
|                         vec1 = vldrhq_f16(pInT1);
 | |
|                         vec2 = vldrhq_f16(pPRT_in);
 | |
|                         vec1 = vfmsq_f16(vec1, tmpV, vec2);
 | |
|                         vstrhq_f16(pInT1, vec1);
 | |
|                         pPRT_in += 8;
 | |
|                         pInT1 += 8;
 | |
|                         /*
 | |
|                          * Decrement the blockSize loop counter
 | |
|                          */
 | |
|                         blkCnt--;
 | |
|                     }
 | |
|                     /*
 | |
|                      * tail
 | |
|                      * (will be merged thru tail predication)
 | |
|                      */
 | |
|                     blkCnt = (numCols - l) & 7;
 | |
|                     if (blkCnt > 0U)
 | |
|                     {
 | |
|                         f16x8_t vec1, vec2;
 | |
|                         mve_pred16_t p0 = vctp16q(blkCnt);
 | |
| 
 | |
|                         vec1 = vldrhq_f16(pInT1);
 | |
|                         vec2 = vldrhq_f16(pPRT_in);
 | |
|                         vec1 = vfmsq_f16(vec1, tmpV, vec2);
 | |
|                         vstrhq_p_f16(pInT1, vec1, p0);
 | |
|                         pInT1 += blkCnt;
 | |
|                     }
 | |
| 
 | |
|                     blkCnt = numCols >> 3;
 | |
|                     while (blkCnt > 0U)
 | |
|                     {
 | |
|                         f16x8_t vec1, vec2;
 | |
| 
 | |
|                         /*
 | |
|                          * Replace the element by the sum of that row
 | |
|                          * and a multiple of the reference row
 | |
|                          */
 | |
|                         vec1 = vldrhq_f16(pOutT1);
 | |
|                         vec2 = vldrhq_f16(pPRT_pDst);
 | |
|                         vec1 = vfmsq_f16(vec1, tmpV, vec2);
 | |
|                         vstrhq_f16(pOutT1, vec1);
 | |
|                         pPRT_pDst += 8;
 | |
|                         pOutT1 += 8;
 | |
|                         /*
 | |
|                          * Decrement the blockSize loop counter
 | |
|                          */
 | |
|                         blkCnt--;
 | |
|                     }
 | |
|                     /*
 | |
|                      * tail
 | |
|                      * (will be merged thru tail predication)
 | |
|                      */
 | |
|                     blkCnt = numCols & 7;
 | |
|                     if (blkCnt > 0U)
 | |
|                     {
 | |
|                         f16x8_t vec1, vec2;
 | |
|                         mve_pred16_t p0 = vctp16q(blkCnt);
 | |
| 
 | |
|                         vec1 = vldrhq_f16(pOutT1);
 | |
|                         vec2 = vldrhq_f16(pPRT_pDst);
 | |
|                         vec1 = vfmsq_f16(vec1, tmpV, vec2);
 | |
|                         vstrhq_p_f16(pOutT1, vec1, p0);
 | |
| 
 | |
|                         pInT2 += blkCnt;
 | |
|                         pOutT1 += blkCnt;
 | |
|                     }
 | |
|                 }
 | |
|                 /*
 | |
|                  * Increment the temporary input pointer
 | |
|                  */
 | |
|                 pInT1 = pInT1 + l;
 | |
|             }
 | |
|             /*
 | |
|              * Increment the input pointer
 | |
|              */
 | |
|             pIn++;
 | |
|             /*
 | |
|              * Decrement the loop counter
 | |
|              */
 | |
|             loopCnt--;
 | |
|             /*
 | |
|              * Increment the index modifier
 | |
|              */
 | |
|             l++;
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|          * Set status as ARM_MATH_SUCCESS
 | |
|          */
 | |
|         status = ARM_MATH_SUCCESS;
 | |
| 
 | |
|         if ((flag != 1U) && (in == 0.0f16))
 | |
|         {
 | |
|             pIn = pSrc->pData;
 | |
|             for (i = 0; i < numRows * numCols; i++)
 | |
|             {
 | |
|                 if ((_Float16)pIn[i] != 0.0f16)
 | |
|                     break;
 | |
|             }
 | |
| 
 | |
|             if (i == numRows * numCols)
 | |
|                 status = ARM_MATH_SINGULAR;
 | |
|         }
 | |
|   }
 | |
|   /* Return to application */
 | |
|   return (status);
 | |
| }
 | |
| 
 | |
| #else
 | |
| 
 | |
| arm_status arm_mat_inverse_f16(
 | |
|   const arm_matrix_instance_f16 * pSrc,
 | |
|         arm_matrix_instance_f16 * pDst)
 | |
| {
 | |
|   float16_t *pIn = pSrc->pData;                  /* input data matrix pointer */
 | |
|   float16_t *pOut = pDst->pData;                 /* output data matrix pointer */
 | |
|   float16_t *pInT1, *pInT2;                      /* Temporary input data matrix pointer */
 | |
|   float16_t *pOutT1, *pOutT2;                    /* Temporary output data matrix pointer */
 | |
|   float16_t *pPivotRowIn, *pPRT_in, *pPivotRowDst, *pPRT_pDst;  /* Temporary input and output data matrix pointer */
 | |
|   uint32_t numRows = pSrc->numRows;              /* Number of rows in the matrix  */
 | |
|   uint32_t numCols = pSrc->numCols;              /* Number of Cols in the matrix  */
 | |
| 
 | |
|   _Float16 Xchg, in = 0.0f16, in1;                /* Temporary input values  */
 | |
|   uint32_t i, rowCnt, flag = 0U, j, loopCnt, k,l;      /* loop counters */
 | |
|   arm_status status;                             /* status of matrix inverse */
 | |
| 
 | |
| #ifdef ARM_MATH_MATRIX_CHECK
 | |
| 
 | |
|   /* Check for matrix mismatch condition */
 | |
|   if ((pSrc->numRows != pSrc->numCols) ||
 | |
|       (pDst->numRows != pDst->numCols) ||
 | |
|       (pSrc->numRows != pDst->numRows)   )
 | |
|   {
 | |
|     /* Set status as ARM_MATH_SIZE_MISMATCH */
 | |
|     status = ARM_MATH_SIZE_MISMATCH;
 | |
|   }
 | |
|   else
 | |
| 
 | |
| #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
 | |
| 
 | |
|   {
 | |
| 
 | |
|     /*--------------------------------------------------------------------------------------------------------------
 | |
|      * Matrix Inverse can be solved using elementary row operations.
 | |
|      *
 | |
|      *  Gauss-Jordan Method:
 | |
|      *
 | |
|      *      1. First combine the identity matrix and the input matrix separated by a bar to form an
 | |
|      *        augmented matrix as follows:
 | |
|      *                      _                  _         _         _
 | |
|      *                     |  a11  a12 | 1   0  |       |  X11 X12  |
 | |
|      *                     |           |        |   =   |           |
 | |
|      *                     |_ a21  a22 | 0   1 _|       |_ X21 X21 _|
 | |
|      *
 | |
|      *      2. In our implementation, pDst Matrix is used as identity matrix.
 | |
|      *
 | |
|      *      3. Begin with the first row. Let i = 1.
 | |
|      *
 | |
|      *      4. Check to see if the pivot for row i is zero.
 | |
|      *         The pivot is the element of the main diagonal that is on the current row.
 | |
|      *         For instance, if working with row i, then the pivot element is aii.
 | |
|      *         If the pivot is zero, exchange that row with a row below it that does not
 | |
|      *         contain a zero in column i. If this is not possible, then an inverse
 | |
|      *         to that matrix does not exist.
 | |
|      *
 | |
|      *      5. Divide every element of row i by the pivot.
 | |
|      *
 | |
|      *      6. For every row below and  row i, replace that row with the sum of that row and
 | |
|      *         a multiple of row i so that each new element in column i below row i is zero.
 | |
|      *
 | |
|      *      7. Move to the next row and column and repeat steps 2 through 5 until you have zeros
 | |
|      *         for every element below and above the main diagonal.
 | |
|      *
 | |
|      *      8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc).
 | |
|      *         Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst).
 | |
|      *----------------------------------------------------------------------------------------------------------------*/
 | |
| 
 | |
|     /* Working pointer for destination matrix */
 | |
|     pOutT1 = pOut;
 | |
| 
 | |
|     /* Loop over the number of rows */
 | |
|     rowCnt = numRows;
 | |
| 
 | |
|     /* Making the destination matrix as identity matrix */
 | |
|     while (rowCnt > 0U)
 | |
|     {
 | |
|       /* Writing all zeroes in lower triangle of the destination matrix */
 | |
|       j = numRows - rowCnt;
 | |
|       while (j > 0U)
 | |
|       {
 | |
|         *pOutT1++ = 0.0f16;
 | |
|         j--;
 | |
|       }
 | |
| 
 | |
|       /* Writing all ones in the diagonal of the destination matrix */
 | |
|       *pOutT1++ = 1.0f16;
 | |
| 
 | |
|       /* Writing all zeroes in upper triangle of the destination matrix */
 | |
|       j = rowCnt - 1U;
 | |
|       while (j > 0U)
 | |
|       {
 | |
|         *pOutT1++ = 0.0f16;
 | |
|         j--;
 | |
|       }
 | |
| 
 | |
|       /* Decrement loop counter */
 | |
|       rowCnt--;
 | |
|     }
 | |
| 
 | |
|     /* Loop over the number of columns of the input matrix.
 | |
|        All the elements in each column are processed by the row operations */
 | |
|     loopCnt = numCols;
 | |
| 
 | |
|     /* Index modifier to navigate through the columns */
 | |
|     l = 0U;
 | |
| 
 | |
|     while (loopCnt > 0U)
 | |
|     {
 | |
|       /* Check if the pivot element is zero..
 | |
|        * If it is zero then interchange the row with non zero row below.
 | |
|        * If there is no non zero element to replace in the rows below,
 | |
|        * then the matrix is Singular. */
 | |
| 
 | |
|       /* Working pointer for the input matrix that points
 | |
|        * to the pivot element of the particular row  */
 | |
|       pInT1 = pIn + (l * numCols);
 | |
| 
 | |
|       /* Working pointer for the destination matrix that points
 | |
|        * to the pivot element of the particular row  */
 | |
|       pOutT1 = pOut + (l * numCols);
 | |
| 
 | |
|       /* Temporary variable to hold the pivot value */
 | |
|       in = *pInT1;
 | |
| 
 | |
| 
 | |
|       /* Check if the pivot element is zero */
 | |
|       if ((_Float16)*pInT1 == 0.0f16)
 | |
|       {
 | |
|         /* Loop over the number rows present below */
 | |
| 
 | |
|         for (i = 1U; i < numRows-l; i++)
 | |
|         {
 | |
|           /* Update the input and destination pointers */
 | |
|           pInT2 = pInT1 + (numCols * i);
 | |
|           pOutT2 = pOutT1 + (numCols * i);
 | |
| 
 | |
|           /* Check if there is a non zero pivot element to
 | |
|            * replace in the rows below */
 | |
|           if ((_Float16)*pInT2 != 0.0f16)
 | |
|           {
 | |
|             /* Loop over number of columns
 | |
|              * to the right of the pilot element */
 | |
|             j = numCols - l;
 | |
| 
 | |
|             while (j > 0U)
 | |
|             {
 | |
|               /* Exchange the row elements of the input matrix */
 | |
|               Xchg = *pInT2;
 | |
|               *pInT2++ = *pInT1;
 | |
|               *pInT1++ = Xchg;
 | |
| 
 | |
|               /* Decrement the loop counter */
 | |
|               j--;
 | |
|             }
 | |
| 
 | |
|             /* Loop over number of columns of the destination matrix */
 | |
|             j = numCols;
 | |
| 
 | |
|             while (j > 0U)
 | |
|             {
 | |
|               /* Exchange the row elements of the destination matrix */
 | |
|               Xchg = *pOutT2;
 | |
|               *pOutT2++ = *pOutT1;
 | |
|               *pOutT1++ = Xchg;
 | |
| 
 | |
|               /* Decrement loop counter */
 | |
|               j--;
 | |
|             }
 | |
| 
 | |
|             /* Flag to indicate whether exchange is done or not */
 | |
|             flag = 1U;
 | |
| 
 | |
|             /* Break after exchange is done */
 | |
|             break;
 | |
|           }
 | |
| 
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       /* Update the status if the matrix is singular */
 | |
|       if ((flag != 1U) && (in == 0.0f16))
 | |
|       {
 | |
|         return ARM_MATH_SINGULAR;
 | |
|       }
 | |
| 
 | |
|       /* Points to the pivot row of input and destination matrices */
 | |
|       pPivotRowIn = pIn + (l * numCols);
 | |
|       pPivotRowDst = pOut + (l * numCols);
 | |
| 
 | |
|       /* Temporary pointers to the pivot row pointers */
 | |
|       pInT1 = pPivotRowIn;
 | |
|       pInT2 = pPivotRowDst;
 | |
| 
 | |
|       /* Pivot element of the row */
 | |
|       in = *pPivotRowIn;
 | |
| 
 | |
|       /* Loop over number of columns
 | |
|        * to the right of the pilot element */
 | |
|       j = (numCols - l);
 | |
| 
 | |
|       while (j > 0U)
 | |
|       {
 | |
|         /* Divide each element of the row of the input matrix
 | |
|          * by the pivot element */
 | |
|         in1 = *pInT1;
 | |
|         *pInT1++ = in1 / in;
 | |
| 
 | |
|         /* Decrement the loop counter */
 | |
|         j--;
 | |
|       }
 | |
| 
 | |
|       /* Loop over number of columns of the destination matrix */
 | |
|       j = numCols;
 | |
| 
 | |
|       while (j > 0U)
 | |
|       {
 | |
|         /* Divide each element of the row of the destination matrix
 | |
|          * by the pivot element */
 | |
|         in1 = *pInT2;
 | |
|         *pInT2++ = in1 / in;
 | |
| 
 | |
|         /* Decrement the loop counter */
 | |
|         j--;
 | |
|       }
 | |
| 
 | |
|       /* Replace the rows with the sum of that row and a multiple of row i
 | |
|        * so that each new element in column i above row i is zero.*/
 | |
| 
 | |
|       /* Temporary pointers for input and destination matrices */
 | |
|       pInT1 = pIn;
 | |
|       pInT2 = pOut;
 | |
| 
 | |
|       /* index used to check for pivot element */
 | |
|       i = 0U;
 | |
| 
 | |
|       /* Loop over number of rows */
 | |
|       /*  to be replaced by the sum of that row and a multiple of row i */
 | |
|       k = numRows;
 | |
| 
 | |
|       while (k > 0U)
 | |
|       {
 | |
|         /* Check for the pivot element */
 | |
|         if (i == l)
 | |
|         {
 | |
|           /* If the processing element is the pivot element,
 | |
|              only the columns to the right are to be processed */
 | |
|           pInT1 += numCols - l;
 | |
| 
 | |
|           pInT2 += numCols;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|           /* Element of the reference row */
 | |
|           in = *pInT1;
 | |
| 
 | |
|           /* Working pointers for input and destination pivot rows */
 | |
|           pPRT_in = pPivotRowIn;
 | |
|           pPRT_pDst = pPivotRowDst;
 | |
| 
 | |
|           /* Loop over the number of columns to the right of the pivot element,
 | |
|              to replace the elements in the input matrix */
 | |
|           j = (numCols - l);
 | |
| 
 | |
|           while (j > 0U)
 | |
|           {
 | |
|             /* Replace the element by the sum of that row
 | |
|                and a multiple of the reference row  */
 | |
|             in1 = *pInT1;
 | |
|             *pInT1++ = (_Float16)in1 - ((_Float16)in * (_Float16)*pPRT_in++);
 | |
| 
 | |
|             /* Decrement the loop counter */
 | |
|             j--;
 | |
|           }
 | |
| 
 | |
|           /* Loop over the number of columns to
 | |
|              replace the elements in the destination matrix */
 | |
|           j = numCols;
 | |
| 
 | |
|           while (j > 0U)
 | |
|           {
 | |
|             /* Replace the element by the sum of that row
 | |
|                and a multiple of the reference row  */
 | |
|             in1 = *pInT2;
 | |
|             *pInT2++ = (_Float16)in1 - ((_Float16)in * (_Float16)*pPRT_pDst++);
 | |
| 
 | |
|             /* Decrement loop counter */
 | |
|             j--;
 | |
|           }
 | |
| 
 | |
|         }
 | |
| 
 | |
|         /* Increment temporary input pointer */
 | |
|         pInT1 = pInT1 + l;
 | |
| 
 | |
|         /* Decrement loop counter */
 | |
|         k--;
 | |
| 
 | |
|         /* Increment pivot index */
 | |
|         i++;
 | |
|       }
 | |
| 
 | |
|       /* Increment the input pointer */
 | |
|       pIn++;
 | |
| 
 | |
|       /* Decrement the loop counter */
 | |
|       loopCnt--;
 | |
| 
 | |
|       /* Increment the index modifier */
 | |
|       l++;
 | |
|     }
 | |
| 
 | |
|     /* Set status as ARM_MATH_SUCCESS */
 | |
|     status = ARM_MATH_SUCCESS;
 | |
| 
 | |
|     if ((flag != 1U) && ((_Float16)in == 0.0f16))
 | |
|     {
 | |
|       pIn = pSrc->pData;
 | |
|       for (i = 0; i < numRows * numCols; i++)
 | |
|       {
 | |
|         if ((_Float16)pIn[i] != 0.0f16)
 | |
|             break;
 | |
|       }
 | |
| 
 | |
|       if (i == numRows * numCols)
 | |
|         status = ARM_MATH_SINGULAR;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /* Return to application */
 | |
|   return (status);
 | |
| }
 | |
| #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
 | |
| 
 | |
| /**
 | |
|   @} end of MatrixInv group
 | |
|  */
 | |
| 
 | |
| #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
 | |
| 
 | 
