1.求絕對值函數
(1) _abs()
C代碼 : int _abs(int src)
彙編: ABS
功能: 求32位數據的絕對值
(2) _labs()
C代碼: int _labs(long src)
彙編: ABS
功能: 求40位數據的絕對值
(3) _abs2()
C代碼: int _abs2(int src)
彙編: ABS2
功能: 同時求高16位和低16位的絕對值,即
return[31:16] = |src[31:16]|
return[15: 0] = |src[15: 0]|
2.運算指令
(1) _add2()
C代碼: int _add2(int src1,int src2)
彙編: ADD2
功能: 同時進行src1,src2的高16位和src1,src2的低16位相加,忽略任何進位,即
return[31:16] = src1[31:16] + src2[31:16]
return[15: 0] = src1[15: 0] + src2[15: 0]
(2) _sadd()
C代碼: int _sadd(int src1,int src2)
彙編: SADD
功能: 普通A+B的加法
(3) _lsadd()
C代碼: long _lsadd(int src1,long src2)
彙編: SADD
功能: 32位數據加上40位數據,返回爲40位數據
(4) _add4()
C代碼: int _add4(int src1,int src2)
彙編: ADD4
功能: 同時進行src1和src2的每個對應Byte的4次加法,忽略任何進位,即
return[31:24] = src1[31:24] + src2[31:24]
return[23:16] = src1[23:16] + src2[23:16]
return[15: 8] = src1[15: 8] + src2[15: 8]
return[ 7: 0] = src1[ 7: 0] + src2[ 7: 0]
備註: src1,src2的每個8位數據當做signed數據使用
(5) _sadd2()
C代碼: int _sadd2(int src1,int src2)
彙編: SADD2
功能: 同時進行src1,src2的高16位和低16位相加,忽略任何進位.即
return[31:16] = src1[31:16] + src2[31:16]
return[15: 0] = src1[15: 0] + src2[15: 0]
備註: src1,src2的每個16位數據被當做signed數據
(6) _saddus2()
C代碼: int _saddus2(unsigned src1,int src2)
彙編: SADDUS2
功能: 執行和_sadd2一樣的操作,但src1解釋不同,見備註
備註: src1的每個16位數據被當作unsigned數據,src2的每個16位數據被當作signed數據
(7) _saddu4()
C代碼: unsigned _saddu4(unsigned src1,unsigned src2)
彙編: SADDU4
功能: 執行和_add4()一樣的操作,但數據解釋爲unsigned,限值爲0xff
(8) _addsub()
C代碼: long long _addsub(int src1,int src2)
彙編: ADDSUB
功能:同時進行src1 + src2和src1 - src2操作,即
hi32(return) = src1 + src2
low32(return) = src - src2
(9) _addsub2()
C代碼: long long _addsub2(int src1,int src2)
彙編: ADDSUB2
功能: 同時進行_add2()和_sub2()操作,即
return[63:48] = hi16(src1) + hi16(src2)
return[47:32] = low16(src1) + low16(src2)
return[31:16] = hi16(src1) - hi16(src2)
return[15:0] = low16(src1) - low16(src2)
(10) _saddsub()
C代碼: long long _saddsub(unsigned src1,unsigned src2)
彙編: SADDSUB
功能: 同時執行add()和sub()操作,即
return[63:32] = src1 + src2
return[31:0] = src1 - src2
(11) _saddsub2()
C代碼: long long _saddsub2(unsigned src1,unsigned src2)
彙編: SADDSUB2
功能: 同時進行sadd2()和ssub2()操作,即
return[63:48] = src1[31:16] + src2[31:16]
return[47:32] = src1[15: 0] + src2[15: 0]
return[31:16] = src1[31:16] - src2[31:16]
return[15: 0] = src1[15: 0] - src2[15: 0]
(12) _ssub2()
C代碼: int _ssub2(unsigned src1,unsigned src2)
彙編: SSUB2
功能: 同時進行高16位和低16位的減法,即
return[31:16] = src1[31:16] - src2[31:16]
return[15: 0] = src1[15: 0] - src2[15: 0]
(13) _mpy2(),_mpy2ll
C代碼: double(long long) _mpy2(int src1,int src2),long long _mpy2ll(int src1,int src2)
彙編:
功能:
(15) _mpyhi(),_mpyhill()
C代碼: double _mpyhi(int src1,int src2),long long _mpyhill(int src1,int src2)
彙編: MPYHI
功能: 執行16位 * 32位操作,即
return = src1[31:16] * src2[31: 0]
(15) _mpyli(),_mpylill()
C代碼: double _mpyli(int src1,int src2),long long _mpylill(int src1,int src2)
彙編: MPYHI
功能: 執行16位 * 32位操作,即
return = src1[15: 0] * src2[31: 0]
(16) _mpyhir()
C代碼: int _mpyhir(int src1,int src2)
彙編: MPYHIR
功能: 執行(16位 * 32位 >> 15)操作,即
return = (src1[31:16] * src2[31: 0]) >> 15;
備註: 結果看起來被四捨五入了,例如0x1122 * 0x55667788結果應該是0x0b6e4b17,但仿真結果爲0x0b6e4b18
(16) _mpylir()
C代碼: int _mpylir(int src1,int src2)
彙編: MPYLIR
功能: 執行(16位 * 32位 >> 15)操作,即
return = (src1[15: 0] * src2[31: 0]) >> 15;
備註: 結果看起來被四捨五入了,例如0x1122 * 0x55667788結果應該是0x0b6e4b17,但仿真結果爲0x0b6e4b18
(17) _mpy*u4(),_mpy*u4ll()
C代碼: double _mpysu4(int src1,int src2),long long _mpysull4(int src1,int src2)
double _mpyu4(unsigned src1,unsigned src2),long long _mpyu4ll(unsigned src1,unsigned src2)
彙編: MPYSU4
MPYU4.M2X B4,A3,B5:A4
功能: 同時執行4個8位 * 8位操作,即
return[63:48] = src1[31:24] * src2[31:24];
return[47:32] = src1[23:16] * src2[23:16];
return[31:16] = src1[15: 8] * src2[15: 8];
return[15: 0] = src1[ 7: 0] * src2[ 7: 0];
(18) _smpy2(),_smpy2ll()
C代碼: double _smpy2(int src1,int src2),long long _smpy2ll(int src1,int src2)
彙編: SMPY2
功能: 同時執行兩個16位*16位操作,結果再左移1位,即
return = ((src1[31:16] * src2[31:16] << 32) + (src1[15: 0] * src2[15: 0])) << 1;
(19) _mpy32**()
C代碼: int _mpy32(int src1,int src2),long long _mpy32ll(int src1,int src2)
long long _mpy32su(int src1,unsigned src2),long long _mpy32us(unsigned src1,int src2)
long long _mpy32u(unsigned src1,unsigned src2)
彙編: MPY32
MPY32SU.M2X B4,A3,B5:A4
MPY32US.M2X B4,A3,B5:A4
MPY32U.M2X B4,A3,B5:A4
功能: 執行32位 * 32位操作
(20) _mpy2ir()
C代碼: long long _mpy2ir(int src1,int src2)
彙編: MPY2IR
功能: 返回如下結果
return[63:32] = src1[31:16] * src2 >> 15
return[31: 0] = src1[15: 0] * src2 >> 5
備註: 每一部分可能被四捨五入
(21) _gmpy()
C代碼: unsignd _gmpy(unsigned src1,unsigned src2)
彙編: GMPY
功能: 執行"Galois Field multiply"
(22) _smpy**()
C代碼: int _smpy(int src1,int src2),int smpyh(int src1,int src2)
int _smpyhl(int src1,int src2),int _smpylh(int src1,int src2)
彙編: SMPY SMPYH
SMPYHL SMPYLH
功能: 執行16位*16位操作,結果再左移一位,限值結果爲小於x80000000
_smpy: return[31: 0] = src1[15: 0] * src2[15: 0] << 1
_smpyh: return[31: 0] = src1[31:16] * src2[31:16] << 1
_smpyhl:return[31: 0] = src1[31:16] * src2[15: 0] << 1
_smpylh:return[31: 0] = src1[15: 0] * src2[31:16] << 1
(23) _mpy**()
C代碼: int _mpy(int src1,int src2),int _mpyus(unsigned src1,int src2)
int _mpysu(int src1,unsigned src2),unsigned _mpyu(unsigned src1,unsigned src2)
彙編: MPY MPYUS
MPYSU MPYU
功能: 返回src1[15: 0] * src2[15: 0]的結果
(24) _mpyh**()
C代碼: int _mpyh(int src1,int src2),int _mpyhus(unsigned src1,int src2)
int _mpyhsu(int src1,unsigned src2),int _mpyhu(unsigned src1,unsigned src2)
彙編: MPYH MPYHUS
MPYHSU MPYHU
功能: 返回src1[31:16] * src2[31:16]的結果
(25) _mpyh*l*()
C代碼: int _mpyhl(int src1,int src2),int _mpyhuls(unsigned src1,int src2)
int _mpyhslu(int src1,unsigned src2),int _mpyhlu(unsigned src1,unsigned src2)
彙編: MPYHL MPYHULS
MPYHSLU MPYHLU
功能: 返回src1[31:16] * src2[15: 0]的結果
(26) _mpyl*h*()
C代碼: int _mpylh(int src1,int src2),int _mpyluhs(unsigned src1,int src2)
int _mpylshu(int src1,unsigned src2),int _mpylhu(unsigned src1,unsigned src2)
彙編: MPYLH MPYLUHS
MPYLSHU MPYLHU
功能: 返回src1[15: 0] * src2[31: 16]的結果
(27) _*ssub()
C代碼: int _ssub(int src1,int src2),long _lssub(int src1,int src2)
彙編: SSUB.L2X B4,A3,B4
功能: 執行src1 - src2操作,符號擴展爲int或long
(28) _subc()
C代碼:unsigned _subc(int src1,int src2)
彙編: SUBC
功能: 未知!!
(29) _sub2()
C代碼: int _sub2(int src1,int src2)
彙編: SUB2
功能: 同時執行高16位和低16位減法,即
return[31:16] = src1[31:16] - src2[31:16]
return[15: 0] = src1[15: 0] - src2[15: 0]
(30) _sub4()
C代碼: int _sub4(int src1,int src2)
彙編: SUB4
功能: 同時執行4個8位減法,即
return[31:24] = src1[31:24] - src2[31:24]
return[23:16] = src1[23:16] - src2[23:16]
return[15: 8] = src1[15: 8] - src2[15: 8]
return[ 7: 0] = src1[ 7: 0] - src2[ 7: 0]
(31) _subabs4()
C代碼: int _subabs4(int src1,int src2)
彙編: SUBABS4
功能: 同時執行4個8位減法,再求絕對值,即
return[31:24] = |src1[31:24] - src2[31:24]|
return[23:16] = |src1[23:16] - src2[23:16]|
return[15: 8] = |src1[15: 8] - src2[15: 8]|
return[ 7: 0] = |src1[ 7: 0] - src2[ 7: 0]|
(32) _avg2()
C代碼: int _avg2(int src1,int src2)
彙編: AVG2
功能: 計算兩路16位平均值,四捨五入結果
return[31:16] = (src1[31:16] + src2[31:16] + 1) / 2;
return[15: 0] = (src1[15: 0] + src2[15: 0] + 1) / 2;
(33) _avgu4()
C代碼: int _avgu4(int src1,int src2)
彙編: AVGU4
功能: 計算四路8位平均值,四捨五入結果
return[31:24] = (src1[31:24] + src2[31:24] + 1) / 2;
return[23:16] = (src1[23:16] + src2[23:16] + 1) / 2;
return[15: 8] = (src1[15: 8] + src2[15: 8] + 1) / 2;
return[ 7: 0] = (src1[ 7: 0] + src2[ 7: 0] + 1) / 2;
3.位操作指令
(1) _clr()
C代碼: int _clr(unsined src,unsigned csta,unsigned cstb)
彙編: CLR
功能: 清除src上的位csta ~ 位cstb,即
src[cstb:csta] = 0;
備註: csta必須 <= cstb,且保證 < 32
(2) _clrr()
C代碼: int _clrr(unsigned src,int shift)
彙編: CLR
功能: 清除src上的shift[ 9: 5] ~ shift[ 4: 0]位
(3) _set()
C代碼: int _set(unsined src,unsigned csta,unsigned cstb)
彙編: SET
功能: 設置src上的位csta ~ 位cstb,即
src[cstb:csta] = '1';
備註: csta必須 <= cstb,且保證 < 32
(4) _setr()
C代碼: int _setr(unsigned src,int shift)
彙編: SET
功能: 設置src上的shift[ 9: 5] ~ shift[ 4: 0]位爲'1'
(5) _sshl()
C代碼: int _sshl(int src,unsigned shift)
彙編: SSHL
功能: return[31: 0] = src << shift;
備註: 有符號擴展功能
(6) _rotl()
C代碼: int _rotl(unsigned src,unsigned shift
彙編: ROTL
功能: return[31: 0] = src << shift;
備註: 無符號擴展功能
(7) __shlmb(),__shrmb()
C代碼: int _shlmb(int src1,int src2),int _shrmb(int src1,int src2)
彙編: SHLMB
功能: shlmb-->return[31:0] = (src2 << 8) | src1[31:24]
shrmb-->return[31:0] = (src2 >> 8) | (src1[7: 0] << 24)
(8) __shr2(),_shru2()
C代碼: int _shr2(int src1,unsigned shift),int _shru2(unsigned src1,unsigned shift)
彙編: SHR2
功能: return[31: 16] = src1[31:16] >> shift
return[15: 0] = src1[15: 0] >> shift
備註: 有符號數操作返回值會進行符號擴展(移出的位全部補1)
(9) _sshvl(),_sshvr()
C代碼: int _sshvl(int src,int shift),int _sshvr(int src,int shift)
彙編: SSHVL SSHVR
功能: sshvl-->return[31: 0] = (src << shift) > MAX_INT?MAX_INT:(src << shift)
sshvr-->return[31: 0] = (src >> shift) < MIN_INT?MIN_INT:(src >> shift)
(10) _shfl()
C代碼: int _shfl(int src)
彙編: SHFL
功能: 低16位嵌入到偶位,高16位嵌入到奇位,即
return[31:0] = src[31]src[15]src[30]src[14]........src[16][src[0]
(11) _ext()
C代碼: int _ext(int src,unsigned lshift,unsigned rshift)
彙編: EXT
功能: return[31: 0] = (src << lshift) >> rshift;
(12) _extr()
C代碼: int _extr(int src,int shift)
彙編: EXT
功能: return[31: 0] = (src << shift[ 9: 5]) >> shift[4: 0];
(13) _extu()
C代碼: int _extu(int src,unsigned lshift,unsigned rshift)
彙編: EXT
功能: return[31: 0] = (src << lshift) >> rshift;
(14) _extur()
C代碼: int _extur(int src,int shift)
彙編: EXT
功能: return[31: 0] = (src << shift[ 9: 5]) >> shift[4: 0];
(15) _lmbd()
C代碼: unsigned _lmbd(int zero_or_one,int src)
彙編: LMBD
功能: 從左到右查找該位是zero_or_one的位,返回該位置
備註:
zero_or_one必須爲0或者1,爲其他值無LMBD指令編譯
如src = 0x0fff0000,則
_lmbd(0,src) == 0 /*D31爲'0',所以返回0*/
_lmbd(1,src) == 4 /*D27爲'1',所以返回4*/
(16) _*norm()
C代碼: unsigned _norm(int src),unsignd _lnorm(long src)
彙編: NORM B4,B4
功能: 未知
(17) _bitc4()
C代碼: unsigned _bitc4(unsigned src)
彙編: BITC4
功能: 統計每個字節的'1'總數,4個總數合成unsigned返回
備註:
例如src = 0x01030507,因爲4個字節分別有0x01,0x02,0x03,0x04個'1',所以返回爲0x01020304
(18) _bitr()
C代碼: unsigned _bitr(unsigned src)
彙編: BITR
功能: 反轉所有的位,即return[31:0] = src[ 0:31]
備註:
例如src = '00010001000100010001000100010001',則返回值是'10001000100010001000100010001000'
(19) _deal()
C代碼: unsigned _deal(unsigned src)
彙編: DEAL
功能: 所有偶位組合成一個16位數據,所有奇位組合成一個16位數據,返回該32位值,即
return[31:16] = src[31,29,27,....,1]
return[15: 0] = src[30,28,26,....,0]
4.內存操作指令
(1) _amem*()
C代碼: ushort& _amem2(void* ptr),const ushort _amem2_const(void* ptr)
unsigned& _amem4(void* ptr),const unsigned& _amem4_const(void* ptr)
long long _amem8(void* ptr),const long long& _amem8_const(void* ptr)
double & _amemd8(void* ptr),const double& _amemd8_const(void* ptr)
彙編: 略
功能: 從對齊地址中讀/寫n字節數據,n = 以上的數字
備註:
讀--->
double val;
char test[8] = {0,1,2,3,4,5,6,7};
val = _amem2_const(&test) + _amem4_const(&test) + _amem8_const(&test);
寫--->
_amem2(&test) = 0x0011;
_amem4(&test) = 0x00112233;
_amem8(&test) = 0x0011223344556677;
(2) _mem*()
C代碼: ushort& _mem2(void* ptr),const ushort _mem2_const(void* ptr)
unsigned& _mem4(void* ptr),const unsigned& _mem4_const(void* ptr)
long long _mem8(void* ptr),const long long& _mem8_const(void* ptr)
double & _memd8(void* ptr),const double& _memd8_const(void* ptr)
彙編: 略
功能: 從非對齊地址中讀/寫n字節數據,n = 以上的數字
備註:
讀--->
double val;
char test[8] = {0,1,2,3,4,5,6,7};
val = _mem2_const(&test) + _mem4_const(&test) + _mem8_const(&test);
寫--->
_mem2(&test) = 0x0011;
_mem4(&test) = 0x00112233;
_mem8(&test) = 0x0011223344556677;
(3) _mvd()
C代碼: int _mvd(int src)
彙編: MVD
功能: 利用4週期乘法流水線拷貝數據,return[31: 0] = src[31: 0]
備註: 這個需要和_mpy**()配合實現並行工作
5.數據包裝/轉換指令
(1) _hi**()
C代碼: unsigned _hi(double src),unsigned _hill(long long src)
彙編: 無
功能: 返回64位數據的高32位數據
(2) _low**()
C代碼: unsigned _lo(double src),unsigned _loll(long long src)
彙編: 無
功能: 返回64位數據的低32位數據
(3) _*to*()
C代碼: ulong _dtol(double src),unsigned _ftoi(float src)
double _itod(unsigned hi32,unsigned low32),float _itof(unsigned src)
long long _itoll(unsigned hi32,unsigned low32),double _ltod(long src)
彙編: 無
功能: 各種數據類型互相轉換
(4) _sat()
C代碼: int _sat(long src2)
彙編: SAT
功能: 把40位long數據轉成32位數據
(5) _pack*2()
C代碼: unsigned _pack2(unsigned src1,unsigned src2),
unsigned _packh2(unsigned src1,unsigned src2)
彙編: PACK2 PACKH2
功能: _pack2--->return[31:16] = src1[15: 0],return[15: 0] = src2[15: 0]
_packh2-->return[31:16] = src1[31: 16],return[15: 0] = src2[31: 16]
(6) _pack*4()
C代碼: unsigned _packh4(unsigned src1,unsigned src2),
unsigned _packl4(unsigned src1,unsigned src2)
彙編: PACKH4 PACKL4
功能: 返回交替的4字節數據
備註: 如src1 = 0x11223344,src2 = 0x55667788,則
_packh4(src1,src2)返回0x11335577
_packl4(src1,src2)返回0x22446688
(7) _pack**2()
C代碼: unsigned _packhl2(unsigned src1,unsigned src2),
unsigned _packlh2(unsigned src1,unsigned src2)
彙編: PACKHL2 PACKLH2
功能: _packhl2--->return[31:16] = src1[31: 16],return[15: 0] = src2[15: 0]
_packlh2-->return[31:16] = src1[15: 0],return[15: 0] = src2[31: 16]
(8) _spack2()
C代碼: int _spack2(int src1,int src2)
彙編: SPACK2
功能: 把兩個32位數據格式化成16位數據,然後組合成32位數據
備註: return[31: 16] = (int16_t)src1
return[15: 0] = (int16_t)src2
(9) _spacku4()
C代碼: unsigned _spacku4(int src1,int src2)
彙編: SPACKU4
功能: 把4個16位數據格式化成4個8位數據,形成32位數據返回
備註: return[31:24] = (unt8_t)src1[31:16]
return[23:16] = (unt8_t)src1[15: 0]
return[15: 8] = (unt8_t)src2[31:16]
return[ 7: 0] = (unt8_t)src1[15: 0]
(10) _swap4()
C代碼: unsigned _swap(unsigned src)
彙編: SWAP4
功能: 大小端數據轉換
備註: return[31:24] 和 return[23:16] 交換
return[15: 8] 和 return[ 7: 0] 交換
(11) _unpkhu4()
C代碼: unsigned _unpkhu4(unsigned src)
彙編: UNPKHU4
功能: 把兩個高8位數據轉成兩個16位數據
備註:
return[31:16] = (uint16_t)src[31:24]
return[15: 0] = (uint16_t)src[23:16]
(12) _unpklu4()
C代碼: unsigned _unpklu4(unsigned src)
彙編: UNPKHU4
功能: 把兩個低8位數據轉成兩個16位數據
備註:
return[31:16] = (uint16_t)src[15: 8]
return[15: 0] = (uint16_t)src[ 7: 0]
6.比較/雜項指令
(1) _cmpeq*() _cmpgt*()
C代碼: int _cmpeq2(int src1,int src2),int _cmpeq4(int src1,int src2)
int _cmpgt2(int src1,int src2),int _cmpgtu4(unsigned src1,unsigned src2)
彙編: CMPEQ2 CMPEQ4
CMPGT2 CMPGT4
功能: 同時比較兩個16位數據或者4個8位數據,比較結果在返回值的低2位或低四位中
備註:
_cmpeq2(0x11223344,0x11220000)返回爲0x02
_cmpeq4(0x11223344,0x00223344)返回爲0x07
_cmpgt2(0x00001111,0x0000ffff)返回爲0x01
_cmpgtu4(0x0000ffff,0x0000aaaa)返回0x03
(2) _xpnd*()
C代碼: int _xpnd2(int src),int _xpnd4(int src)
彙編: XPND2 XPND4
功能: _xpnd2()把src的低2位邏輯值擴展爲2個16位邏輯值
_xpnd4()把src的低4位邏輯值擴展爲4個8位邏輯值
備註:
_xpnd*()一般和_cmp*()配合實現邏輯擴展
_xpnd2(0x01) = 0x0000ffff
_xpnd2(0x03) = 0xffffffff
_xpnd2(0x00) = 0x00000000
_xpnd4(0x00) = 0x00000000
_xpnd4(0x08) = 0xff000000
_xpnd4(0x07) = 0x00ffffff
_xpnd4(0x01) = 0x000000ff