mirror of
				https://github.com/IcedRooibos/py32f0-template.git
				synced 2025-10-31 09:52:05 -07:00 
			
		
		
		
	
		
			
				
	
	
		
			552 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			552 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
 | |
| /* ----------------------------------------------------------------------
 | |
|  * Project:      CMSIS DSP Library
 | |
|  * Title:        arm_boolean_distance.c
 | |
|  * Description:  Templates for boolean distances
 | |
|  *
 | |
|  * $Date:        23 April 2021
 | |
|  * $Revision:    V1.9.0
 | |
|  *
 | |
|  * Target Processor: Cortex-M and Cortex-A cores
 | |
|  * -------------------------------------------------------------------- */
 | |
| /*
 | |
|  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 | |
|  *
 | |
|  * SPDX-License-Identifier: Apache-2.0
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the License); you may
 | |
|  * not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  * www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 | |
|  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * @defgroup DISTANCEF Distance Functions
 | |
|  *
 | |
|  * Computes Distances between vectors. 
 | |
|  *
 | |
|  * Distance functions are useful in a lot of algorithms.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * @addtogroup DISTANCEF
 | |
|  * @{
 | |
|  */
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| #define _FUNC(A,B) A##B 
 | |
| 
 | |
| #define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
 | |
| 
 | |
| /**
 | |
|  * @brief        Elements of boolean distances
 | |
|  *
 | |
|  * Different values which are used to compute boolean distances
 | |
|  *
 | |
|  * @param[in]    pA              First vector of packed booleans
 | |
|  * @param[in]    pB              Second vector of packed booleans
 | |
|  * @param[in]    numberOfBools   Number of booleans
 | |
|  * @return None
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
 | |
| 
 | |
| #include "arm_common_tables.h"
 | |
| 
 | |
| void FUNC(EXT)(const uint32_t *pA
 | |
|        , const uint32_t *pB
 | |
|        , uint32_t numberOfBools
 | |
| #ifdef TT
 | |
|        , uint32_t *cTT
 | |
| #endif
 | |
| #ifdef FF
 | |
|        , uint32_t *cFF
 | |
| #endif
 | |
| #ifdef TF
 | |
|        , uint32_t *cTF
 | |
| #endif
 | |
| #ifdef FT
 | |
|        , uint32_t *cFT
 | |
| #endif
 | |
|        )
 | |
| {
 | |
| 
 | |
| #ifdef TT
 | |
|     uint32_t _ctt=0;
 | |
| #endif
 | |
| #ifdef FF
 | |
|     uint32_t _cff=0;
 | |
| #endif
 | |
| #ifdef TF
 | |
|     uint32_t _ctf=0;
 | |
| #endif
 | |
| #ifdef FT
 | |
|     uint32_t _cft=0;
 | |
| #endif
 | |
|     uint32_t        a, b, ba, bb;
 | |
|     int shift;
 | |
|     const uint8_t  *pA8 = (const uint8_t *) pA;
 | |
|     const uint8_t  *pB8 = (const uint8_t *) pB;
 | |
| 
 | |
|     /* handle vector blocks */
 | |
|     uint32_t         blkCnt = numberOfBools / 128;
 | |
| 
 | |
| 
 | |
| 
 | |
|     while (blkCnt > 0U) {
 | |
|         uint8x16_t      vecA = vld1q((const uint8_t *) pA8);
 | |
|         uint8x16_t      vecB = vld1q((const uint8_t *) pB8);
 | |
| 
 | |
| #ifdef TT
 | |
|         uint8x16_t      vecTT = vecA & vecB;
 | |
|         vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
 | |
|         _ctt += vaddvq(vecTT);
 | |
| #endif
 | |
| #ifdef FF
 | |
|         uint8x16_t      vecFF = vmvnq(vecA) & vmvnq(vecB);
 | |
|         vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
 | |
|         _cff += vaddvq(vecFF);
 | |
| #endif
 | |
| #ifdef TF
 | |
|         uint8x16_t      vecTF = vecA & vmvnq(vecB);
 | |
|         vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
 | |
|         _ctf += vaddvq(vecTF);
 | |
| #endif
 | |
| #ifdef FT
 | |
|         uint8x16_t      vecFT = vmvnq(vecA) & vecB;
 | |
|         vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
 | |
|         _cft += vaddvq(vecFT);
 | |
| #endif
 | |
| 
 | |
|         pA8 += 16;
 | |
|         pB8 += 16;
 | |
|         blkCnt--;
 | |
| 
 | |
|     }
 | |
| 
 | |
|     pA = (const uint32_t *)pA8;
 | |
|     pB = (const uint32_t *)pB8;
 | |
| 
 | |
|     blkCnt = numberOfBools & 0x7F;
 | |
|     while(blkCnt >= 32)
 | |
|     {
 | |
|        a = *pA++;
 | |
|        b = *pB++;
 | |
|        shift = 0;
 | |
|        while(shift < 32)
 | |
|        {
 | |
|           ba = a & 1;
 | |
|           bb = b & 1;
 | |
|           a = a >> 1;
 | |
|           b = b >> 1;
 | |
| 
 | |
| #ifdef TT
 | |
|           _ctt += (ba && bb);
 | |
| #endif
 | |
| #ifdef FF
 | |
|           _cff += ((1 ^ ba) && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef TF
 | |
|           _ctf += (ba && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef FT
 | |
|           _cft += ((1 ^ ba) && bb);
 | |
| #endif
 | |
|           shift ++;
 | |
|        }
 | |
| 
 | |
|        blkCnt -= 32;
 | |
|     }
 | |
| 
 | |
|     a = *pA++;
 | |
|     b = *pB++;
 | |
| 
 | |
|     a = a >> (32 - blkCnt);
 | |
|     b = b >> (32 - blkCnt);
 | |
| 
 | |
|     while(blkCnt > 0)
 | |
|     {
 | |
|           ba = a & 1;
 | |
|           bb = b & 1;
 | |
|           a = a >> 1;
 | |
| 
 | |
|           b = b >> 1;
 | |
| #ifdef TT
 | |
|           _ctt += (ba && bb);
 | |
| #endif
 | |
| #ifdef FF
 | |
|           _cff += ((1 ^ ba) && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef TF
 | |
|           _ctf += (ba && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef FT
 | |
|           _cft += ((1 ^ ba) && bb);
 | |
| #endif
 | |
|           blkCnt --;
 | |
|     }
 | |
| 
 | |
| #ifdef TT
 | |
|     *cTT = _ctt;
 | |
| #endif
 | |
| #ifdef FF
 | |
|     *cFF = _cff;
 | |
| #endif
 | |
| #ifdef TF
 | |
|     *cTF = _ctf;
 | |
| #endif
 | |
| #ifdef FT
 | |
|     *cFT = _cft;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| #else
 | |
| #if defined(ARM_MATH_NEON)
 | |
| 
 | |
| 
 | |
| void FUNC(EXT)(const uint32_t *pA
 | |
|        , const uint32_t *pB
 | |
|        , uint32_t numberOfBools
 | |
| #ifdef TT
 | |
|        , uint32_t *cTT
 | |
| #endif
 | |
| #ifdef FF
 | |
|        , uint32_t *cFF
 | |
| #endif
 | |
| #ifdef TF
 | |
|        , uint32_t *cTF
 | |
| #endif
 | |
| #ifdef FT
 | |
|        , uint32_t *cFT
 | |
| #endif
 | |
|        )
 | |
| {
 | |
| #ifdef TT
 | |
|     uint32_t _ctt=0;
 | |
| #endif
 | |
| #ifdef FF
 | |
|     uint32_t _cff=0;
 | |
| #endif
 | |
| #ifdef TF
 | |
|     uint32_t _ctf=0;
 | |
| #endif
 | |
| #ifdef FT
 | |
|     uint32_t _cft=0;
 | |
| #endif
 | |
|     uint32_t nbBoolBlock;
 | |
|     uint32_t a,b,ba,bb;
 | |
|     int shift;
 | |
|     uint32x4_t aV, bV;
 | |
| #ifdef TT
 | |
|     uint32x4_t cttV;
 | |
| #endif
 | |
| #ifdef FF
 | |
|     uint32x4_t cffV;
 | |
| #endif
 | |
| #ifdef TF
 | |
|     uint32x4_t ctfV;
 | |
| #endif
 | |
| #ifdef FT
 | |
|     uint32x4_t cftV;
 | |
| #endif
 | |
|     uint8x16_t tmp;
 | |
|     uint16x8_t tmp2;
 | |
|     uint32x4_t tmp3;
 | |
|     uint64x2_t tmp4;
 | |
| #ifdef TT
 | |
|     uint64x2_t tmp4tt;
 | |
| #endif
 | |
| #ifdef FF
 | |
|     uint64x2_t tmp4ff;
 | |
| #endif
 | |
| #ifdef TF
 | |
|     uint64x2_t tmp4tf;
 | |
| #endif
 | |
| #ifdef FT
 | |
|     uint64x2_t tmp4ft;
 | |
| #endif
 | |
| 
 | |
| #ifdef TT
 | |
|     tmp4tt = vdupq_n_u64(0);
 | |
| #endif
 | |
| #ifdef FF
 | |
|     tmp4ff = vdupq_n_u64(0);
 | |
| #endif
 | |
| #ifdef TF
 | |
|     tmp4tf = vdupq_n_u64(0);
 | |
| #endif
 | |
| #ifdef FT
 | |
|     tmp4ft = vdupq_n_u64(0);
 | |
| #endif
 | |
| 
 | |
|     nbBoolBlock = numberOfBools >> 7;
 | |
|     while(nbBoolBlock > 0)
 | |
|     {
 | |
|        aV = vld1q_u32(pA);
 | |
|        bV = vld1q_u32(pB);
 | |
|        pA += 4;
 | |
|        pB += 4;
 | |
| 
 | |
| #ifdef TT
 | |
|        cttV = vandq_u32(aV,bV);
 | |
| #endif
 | |
| #ifdef FF
 | |
|        cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
 | |
| #endif
 | |
| #ifdef TF
 | |
|        ctfV = vandq_u32(aV,vmvnq_u32(bV));
 | |
| #endif
 | |
| #ifdef FT
 | |
|        cftV = vandq_u32(vmvnq_u32(aV),bV);
 | |
| #endif
 | |
| 
 | |
| #ifdef TT
 | |
|        tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
 | |
|        tmp2 = vpaddlq_u8(tmp);
 | |
|        tmp3 = vpaddlq_u16(tmp2);
 | |
|        tmp4 = vpaddlq_u32(tmp3);
 | |
|        tmp4tt = vaddq_u64(tmp4tt, tmp4);
 | |
| #endif
 | |
| 
 | |
| #ifdef FF
 | |
|        tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
 | |
|        tmp2 = vpaddlq_u8(tmp);
 | |
|        tmp3 = vpaddlq_u16(tmp2);
 | |
|        tmp4 = vpaddlq_u32(tmp3);
 | |
|        tmp4ff = vaddq_u64(tmp4ff, tmp4);
 | |
| #endif
 | |
| 
 | |
| #ifdef TF
 | |
|        tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
 | |
|        tmp2 = vpaddlq_u8(tmp);
 | |
|        tmp3 = vpaddlq_u16(tmp2);
 | |
|        tmp4 = vpaddlq_u32(tmp3);
 | |
|        tmp4tf = vaddq_u64(tmp4tf, tmp4);
 | |
| #endif 
 | |
| 
 | |
| #ifdef FT
 | |
|        tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
 | |
|        tmp2 = vpaddlq_u8(tmp);
 | |
|        tmp3 = vpaddlq_u16(tmp2);
 | |
|        tmp4 = vpaddlq_u32(tmp3);
 | |
|        tmp4ft = vaddq_u64(tmp4ft, tmp4);
 | |
| #endif
 | |
| 
 | |
| 
 | |
|        nbBoolBlock --;
 | |
|     }
 | |
| 
 | |
| #ifdef TT
 | |
|     _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
 | |
| #endif
 | |
| #ifdef FF
 | |
|     _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
 | |
| #endif
 | |
| #ifdef TF
 | |
|     _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
 | |
| #endif
 | |
| #ifdef FT
 | |
|     _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
 | |
| #endif
 | |
| 
 | |
|     nbBoolBlock = numberOfBools & 0x7F;
 | |
|     while(nbBoolBlock >= 32)
 | |
|     {
 | |
|        a = *pA++;
 | |
|        b = *pB++;
 | |
|        shift = 0;
 | |
|        while(shift < 32)
 | |
|        {
 | |
|           ba = a & 1;
 | |
|           bb = b & 1;
 | |
|           a = a >> 1;
 | |
|           b = b >> 1;
 | |
| 
 | |
| #ifdef TT
 | |
|           _ctt += (ba && bb);
 | |
| #endif
 | |
| #ifdef FF
 | |
|           _cff += ((1 ^ ba) && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef TF
 | |
|           _ctf += (ba && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef FT
 | |
|           _cft += ((1 ^ ba) && bb);
 | |
| #endif
 | |
|           shift ++;
 | |
|        }
 | |
| 
 | |
|        nbBoolBlock -= 32;
 | |
|     }
 | |
| 
 | |
|     a = *pA++;
 | |
|     b = *pB++;
 | |
| 
 | |
|     a = a >> (32 - nbBoolBlock);
 | |
|     b = b >> (32 - nbBoolBlock);
 | |
| 
 | |
|     while(nbBoolBlock > 0)
 | |
|     {
 | |
|           ba = a & 1;
 | |
|           bb = b & 1;
 | |
|           a = a >> 1;
 | |
| 
 | |
|           b = b >> 1;
 | |
| #ifdef TT
 | |
|           _ctt += (ba && bb);
 | |
| #endif
 | |
| #ifdef FF
 | |
|           _cff += ((1 ^ ba) && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef TF
 | |
|           _ctf += (ba && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef FT
 | |
|           _cft += ((1 ^ ba) && bb);
 | |
| #endif
 | |
|           nbBoolBlock --;
 | |
|     }
 | |
| 
 | |
| #ifdef TT
 | |
|     *cTT = _ctt;
 | |
| #endif
 | |
| #ifdef FF
 | |
|     *cFF = _cff;
 | |
| #endif
 | |
| #ifdef TF
 | |
|     *cTF = _ctf;
 | |
| #endif
 | |
| #ifdef FT
 | |
|     *cFT = _cft;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| #else
 | |
| 
 | |
| void FUNC(EXT)(const uint32_t *pA
 | |
|        , const uint32_t *pB
 | |
|        , uint32_t numberOfBools
 | |
| #ifdef TT
 | |
|        , uint32_t *cTT
 | |
| #endif
 | |
| #ifdef FF
 | |
|        , uint32_t *cFF
 | |
| #endif
 | |
| #ifdef TF
 | |
|        , uint32_t *cTF
 | |
| #endif
 | |
| #ifdef FT
 | |
|        , uint32_t *cFT
 | |
| #endif
 | |
|        )
 | |
| {
 | |
|   
 | |
| #ifdef TT
 | |
|     uint32_t _ctt=0;
 | |
| #endif
 | |
| #ifdef FF
 | |
|     uint32_t _cff=0;
 | |
| #endif
 | |
| #ifdef TF
 | |
|     uint32_t _ctf=0;
 | |
| #endif
 | |
| #ifdef FT
 | |
|     uint32_t _cft=0;
 | |
| #endif
 | |
|     uint32_t a,b,ba,bb;
 | |
|     int shift;
 | |
| 
 | |
|     while(numberOfBools >= 32)
 | |
|     {
 | |
|        a = *pA++;
 | |
|        b = *pB++;
 | |
|        shift = 0;
 | |
|        while(shift < 32)
 | |
|        {
 | |
|           ba = a & 1;
 | |
|           bb = b & 1;
 | |
|           a = a >> 1;
 | |
|           b = b >> 1;
 | |
| #ifdef TT
 | |
|           _ctt += (ba && bb);
 | |
| #endif
 | |
| #ifdef FF
 | |
|           _cff += ((1 ^ ba) && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef TF
 | |
|           _ctf += (ba && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef FT
 | |
|           _cft += ((1 ^ ba) && bb);
 | |
| #endif
 | |
|           shift ++;
 | |
|        }
 | |
| 
 | |
|        numberOfBools -= 32;
 | |
|     }
 | |
| 
 | |
|     a = *pA++;
 | |
|     b = *pB++;
 | |
| 
 | |
|     a = a >> (32 - numberOfBools);
 | |
|     b = b >> (32 - numberOfBools);
 | |
| 
 | |
|     while(numberOfBools > 0)
 | |
|     {
 | |
|           ba = a & 1;
 | |
|           bb = b & 1;
 | |
|           a = a >> 1;
 | |
|           b = b >> 1;
 | |
| 
 | |
| #ifdef TT
 | |
|           _ctt += (ba && bb);
 | |
| #endif
 | |
| #ifdef FF
 | |
|           _cff += ((1 ^ ba) && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef TF
 | |
|           _ctf += (ba && (1 ^ bb));
 | |
| #endif
 | |
| #ifdef FT
 | |
|           _cft += ((1 ^ ba) && bb);
 | |
| #endif
 | |
|           numberOfBools --;
 | |
|     }
 | |
| 
 | |
| #ifdef TT
 | |
|     *cTT = _ctt;
 | |
| #endif
 | |
| #ifdef FF
 | |
|     *cFF = _cff;
 | |
| #endif
 | |
| #ifdef TF 
 | |
|     *cTF = _ctf;
 | |
| #endif
 | |
| #ifdef FT
 | |
|     *cFT = _cft;
 | |
| #endif
 | |
| }
 | |
| #endif
 | |
| #endif /* defined(ARM_MATH_MVEI) */
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * @} end of DISTANCEF group
 | |
|  */
 | 
