我正在尝试在 Seeed Xiao ESP32-C3 上实现Ivan Kostoski 的基于 ESP32 的声级计的代码。
根据 Seeed 的wiki,该芯片“是一个 32 位 RISC-V CPU,其中包括一个用于 32 位单精度算术的 FPU(浮点单元)”。因此,基于此我编写了一些用于浮点运算的 RISC-V 汇编代码。unrecognized opcode
但是,当我尝试编译时,每个浮点指令都会出现错误。其他指令不会引发错误。
这是 Arduino IDE 控制台输出。请注意第一条指令 ( addi
) 不会引发错误。
C:\...\cc4DgX57.s: Assembler messages:
C:\...\cc4DgX57.s:12: Error: unrecognized opcode `flw f0,0(a5)'
C:\...\cc4DgX57.s:13: Error: unrecognized opcode `flw f1,4(a5)'
C:\...\cc4DgX57.s:14: Error: unrecognized opcode `flw f2,8(a5)'
C:\...\cc4DgX57.s:15: Error: unrecognized opcode `flw f3,12(a5)'
C:\...\cc4DgX57.s:16: Error: unrecognized opcode `flw f4,0(a6)'
C:\...\cc4DgX57.s:17: Error: unrecognized opcode `flw f5,4(a6)'
C:\...\cc4DgX57.s:22: Error: unrecognized opcode `flw f6,a2'
C:\...\cc4DgX57.s:24: Error: unrecognized opcode `fmadd.s f6,f2,f4'
C:\...\cc4DgX57.s:25: Error: unrecognized opcode `fmadd.s f6,f3,f5'
C:\...\cc4DgX57.s:26: Error: unrecognized opcode `fmv.s f7,f6'
C:\...\cc4DgX57.s:27: Error: unrecognized opcode `fmadd.s f7,f0,f4'
C:\...\cc4DgX57.s:28: Error: unrecognized opcode `fmadd.s f7,f1,f5'
C:\...\cc4DgX57.s:29: Error: unrecognized opcode `fsw f7,a3'
C:\...\cc4DgX57.s:31: Error: unrecognized opcode `fmv.s f5,f4'
C:\...\cc4DgX57.s:32: Error: unrecognized opcode `fmv.s f4,f6'
C:\...\cc4DgX57.s:34: Error: backward ref to unknown label "1:"
C:\...\cc4DgX57.s:37: Error: unrecognized opcode `fsw f4,0(a6)'
C:\...\cc4DgX57.s:38: Error: unrecognized opcode `fsw f5,4(a6)'
C:\...\cc4DgX57.s:39: Error: unrecognized opcode `fmvi a2,0'
C:\...\cc4DgX57.s:48: Error: unrecognized opcode `flw f0,0(a5)'
C:\...\cc4DgX57.s:49: Error: unrecognized opcode `flw f1,4(a5)'
C:\...\cc4DgX57.s:50: Error: unrecognized opcode `flw f2,8(a5)'
C:\...\cc4DgX57.s:51: Error: unrecognized opcode `flw f3,12(a5)'
C:\...\cc4DgX57.s:52: Error: unrecognized opcode `flw f4,0(a6)'
C:\...\cc4DgX57.s:53: Error: unrecognized opcode `flw f5,4(a6)'
C:\...\cc4DgX57.s:54: Error: unrecognized opcode `fmv.s f6,a7'
C:\...\cc4DgX57.s:55: Error: unrecognized opcode `fli f10,0.0'
C:\...\cc4DgX57.s:56: Error: symbol `loop' is already defined
C:\...\cc4DgX57.s:59: Error: symbol `i' is already defined
C:\...\cc4DgX57.s:60: Error: unrecognized opcode `flw f7,a2'
C:\...\cc4DgX57.s:62: Error: unrecognized opcode `fmadd.s f7,f2,f4'
C:\...\cc4DgX57.s:63: Error: unrecognized opcode `fmadd.s f7,f3,f5'
C:\...\cc4DgX57.s:64: Error: unrecognized opcode `fmv.s f8,f7'
C:\...\cc4DgX57.s:65: Error: unrecognized opcode `fmadd.s f8,f0,f4'
C:\...\cc4DgX57.s:66: Error: unrecognized opcode `fmadd.s f8,f1,f5'
C:\...\cc4DgX57.s:67: Error: unrecognized opcode `fmul.s f9,f8,f6'
C:\...\cc4DgX57.s:68: Error: unrecognized opcode `fsw f9,a3'
C:\...\cc4DgX57.s:70: Error: unrecognized opcode `fmv.s f5,f4'
C:\...\cc4DgX57.s:71: Error: unrecognized opcode `fmv.s f4,f7'
C:\...\cc4DgX57.s:72: Error: unrecognized opcode `fmadd.s f10,f9,f9'
C:\...\cc4DgX57.s:74: Error: backward ref to unknown label "1:"
C:\...\cc4DgX57.s:76: Error: symbol `exit' is already defined
C:\...\cc4DgX57.s:77: Error: unrecognized opcode `fsw f4,a6,0'
C:\...\cc4DgX57.s:78: Error: unrecognized opcode `fsw f5,a6,4'
C:\...\cc4DgX57.s:79: Error: unrecognized opcode `fmr a2,f10'
C:\...\cc4DgX57.s: Error: local label `"1" (instance number 1 of a fb label)' is not defined
这是用 Xtensa ISA 编写的参考代码,我正在尝试将其移植到 RISC-V:
extern "C" {
int sos_filter_f32(float *input, float *output, int len, const SOS_Coefficients &coeffs, SOS_Delay_State &w);
}
__asm__ (
//
// ESP32 implementation of IIR Second-Order Section filter
// Assumes a0 and b0 coefficients are one (1.0)
//
// float* a2 = input;
// float* a3 = output;
// int a4 = len;
// float* a5 = coeffs;
// float* a6 = w;
// float a7 = gain;
//
".text \n"
".align 4 \n"
".global sos_filter_f32 \n"
".type sos_filter_f32,@function\n"
"sos_filter_f32: \n"
" entry a1, 16 \n"
" lsi f0, a5, 0 \n" // float f0 = coeffs.b1;
" lsi f1, a5, 4 \n" // float f1 = coeffs.b2;
" lsi f2, a5, 8 \n" // float f2 = coeffs.a1;
" lsi f3, a5, 12 \n" // float f3 = coeffs.a2;
" lsi f4, a6, 0 \n" // float f4 = w[0];
" lsi f5, a6, 4 \n" // float f5 = w[1];
" loopnez a4, 1f \n" // for (; len>0; len--) {
" lsip f6, a2, 4 \n" // float f6 = *input++;
" madd.s f6, f2, f4 \n" // f6 += f2 * f4; // coeffs.a1 * w0
" madd.s f6, f3, f5 \n" // f6 += f3 * f5; // coeffs.a2 * w1
" mov.s f7, f6 \n" // f7 = f6; // b0 assumed 1.0
" madd.s f7, f0, f4 \n" // f7 += f0 * f4; // coeffs.b1 * w0
" madd.s f7, f1, f5 \n" // f7 += f1 * f5; // coeffs.b2 * w1 -> result
" ssip f7, a3, 4 \n" // *output++ = f7;
" mov.s f5, f4 \n" // f5 = f4; // w1 = w0
" mov.s f4, f6 \n" // f4 = f6; // w0 = f6
" 1: \n" // }
" ssi f4, a6, 0 \n" // w[0] = f4;
" ssi f5, a6, 4 \n" // w[1] = f5;
" movi.n a2, 0 \n" // return 0;
" retw.n \n"
);
extern "C" {
float sos_filter_sum_sqr_f32(float *input, float *output, int len, const SOS_Coefficients &coeffs, SOS_Delay_State &w, float gain);
}
__asm__ (
//
// ESP32 implementation of IIR Second-Order section filter with applied gain.
// Assumes a0 and b0 coefficients are one (1.0)
// Returns sum of squares of filtered samples
//
// float* a2 = input;
// float* a3 = output;
// int a4 = len;
// float* a5 = coeffs;
// float* a6 = w;
// float a7 = gain;
//
".text \n"
".align 4 \n"
".global sos_filter_sum_sqr_f32 \n"
".type sos_filter_sum_sqr_f32,@function \n"
"sos_filter_sum_sqr_f32: \n"
" entry a1, 16 \n"
" lsi f0, a5, 0 \n" // float f0 = coeffs.b1;
" lsi f1, a5, 4 \n" // float f1 = coeffs.b2;
" lsi f2, a5, 8 \n" // float f2 = coeffs.a1;
" lsi f3, a5, 12 \n" // float f3 = coeffs.a2;
" lsi f4, a6, 0 \n" // float f4 = w[0];
" lsi f5, a6, 4 \n" // float f5 = w[1];
" wfr f6, a7 \n" // float f6 = gain;
" const.s f10, 0 \n" // float sum_sqr = 0;
" loopnez a4, 1f \n" // for (; len>0; len--) {
" lsip f7, a2, 4 \n" // float f7 = *input++;
" madd.s f7, f2, f4 \n" // f7 += f2 * f4; // coeffs.a1 * w0
" madd.s f7, f3, f5 \n" // f7 += f3 * f5; // coeffs.a2 * w1;
" mov.s f8, f7 \n" // f8 = f7; // b0 assumed 1.0
" madd.s f8, f0, f4 \n" // f8 += f0 * f4; // coeffs.b1 * w0;
" madd.s f8, f1, f5 \n" // f8 += f1 * f5; // coeffs.b2 * w1;
" mul.s f9, f8, f6 \n" // f9 = f8 * f6; // f8 * gain -> result
" ssip f9, a3, 4 \n" // *output++ = f9;
" mov.s f5, f4 \n" // f5 = f4; // w1 = w0
" mov.s f4, f7 \n" // f4 = f7; // w0 = f7;
" madd.s f10, f9, f9 \n" // f10 += f9 * f9; // sum_sqr += f9 * f9;
" 1: \n" // }
" ssi f4, a6, 0 \n" // w[0] = f4;
" ssi f5, a6, 4 \n" // w[1] = f5;
" rfr a2, f10 \n" // return sum_sqr;
" retw.n \n" //
);
这是我的 RISC-V 代码:
extern "C" {
int sos_filter_f32(float *input, float *output, int len, const SOS_Coefficients &coeffs, SOS_Delay_State &w);
}
__asm__ (
//
// RISC-V implementation of IIR Second-Order Section filter
// Assumes a0 and b0 coefficients are one (1.0)
//
// float* a2 = input;
// float* a3 = output;
// int a4 = len;
// float* a5 = coeffs;
// float* a6 = w;
// float a7 = gain;
//
".text \n"
".p2align 2 \n"
".globl sos_filter_f32 \n"
".type sos_filter_f32,@function\n"
"sos_filter_f32: \n"
" addi sp, sp, -16 \n"
" flw f0, 0(a5) \n" // float f0 = coeffs.b1;
" flw f1, 4(a5) \n" // float f1 = coeffs.b2;
" flw f2, 8(a5) \n" // float f2 = coeffs.a1;
" flw f3, 12(a5) \n" // float f3 = coeffs.a2;
" flw f4, 0(a6) \n" // float f4 = w[0];
" flw f5, 4(a6) \n" // float f5 = w[1];
" loop: \n"
" bnez a4, 1f \n" // for (; len>0; len--) {
" j exit \n"
" i: \n"
" flw f6, a2 \n" // float f6 = *input++;
" addi a2, a2, 4 \n" // post-increment by 4
" fmadd.s f6, f2, f4 \n" // f6 += f2 * f4; // coeffs.a1 * w0
" fmadd.s f6, f3, f5 \n" // f6 += f3 * f5; // coeffs.a2 * w1
" fmv.s f7, f6 \n" // f7 = f6; // b0 assumed 1.0
" fmadd.s f7, f0, f4 \n" // f7 += f0 * f4; // coeffs.b1 * w0
" fmadd.s f7, f1, f5 \n" // f7 += f1 * f5; // coeffs.b2 * w1 -> result
" fsw f7, a3 \n" // *output++ = f7;
" addi a3, a3, 4 \n" // post-increment by 4
" fmv.s f5, f4 \n" // f5 = f4; // w1 = w0
" fmv.s f4, f6 \n" // f4 = f6; // w0 = f6
" addi a4, a4, -1 \n" // update loop counter
" bnez a4, 1b \n"
" j exit \n"
" exit: \n" // }
" fsw f4, 0(a6) \n" // w[0] = f4;
" fsw f5, 4(a6) \n" // w[1] = f5;
" fmvi a2, 0 \n" // return 0;
" ret \n"
);
extern "C" {
float sos_filter_sum_sqr_f32(float *input, float *output, int len, const SOS_Coefficients &coeffs, SOS_Delay_State &w, float gain);
}
__asm__ (
//
// RISC-V implementation of IIR Second-Order section filter with applied gain.
// Assumes a0 and b0 coefficients are one (1.0)
// Returns sum of squares of filtered samples
//
// float* a2 = input;
// float* a3 = output;
// int a4 = len;
// float* a5 = coeffs;
// float* a6 = w;
// float a7 = gain;
//
".text \n"
".p2align 2 \n"
".globl sos_filter_sum_sqr_f32 \n"
".type sos_filter_sum_sqr_f32,@function \n"
"sos_filter_sum_sqr_f32: \n"
" addi sp, sp, -16 \n"
" flw f0, 0(a5) \n" // float f0 = coeffs.b1;
" flw f1, 4(a5) \n" // float f1 = coeffs.b2;
" flw f2, 8(a5) \n" // float f2 = coeffs.a1;
" flw f3, 12(a5) \n" // float f3 = coeffs.a2;
" flw f4, 0(a6) \n" // float f4 = w[0];
" flw f5, 4(a6) \n" // float f5 = w[1];
" fmv.s f6, a7 \n" // float f6 = gain;
" fli f10, 0.0 \n" // float sum_sqr = 0;
" loop: \n"
" bnez a4, 1f \n" // for (; len>0; len--) {
" j exit \n"
" i: \n"
" flw f7, a2 \n" // float f7 = *input++;
" addi a2, a2, 4 \n" // post-increment by 4
" fmadd.s f7, f2, f4 \n" // f7 += f2 * f4; // coeffs.a1 * w0
" fmadd.s f7, f3, f5 \n" // f7 += f3 * f5; // coeffs.a2 * w1;
" fmv.s f8, f7 \n" // f8 = f7; // b0 assumed 1.0
" fmadd.s f8, f0, f4 \n" // f8 += f0 * f4; // coeffs.b1 * w0;
" fmadd.s f8, f1, f5 \n" // f8 += f1 * f5; // coeffs.b2 * w1;
" fmul.s f9, f8, f6 \n" // f9 = f8 * f6; // f8 * gain -> result
" fsw f9, a3 \n" // *output++ = f9;
" addi a3, a3, 4 \n" // post-increment by 4
" fmv.s f5, f4 \n" // f5 = f4; // w1 = w0
" fmv.s f4, f7 \n" // f4 = f7; // w0 = f7;
" fmadd.s f10, f9, f9 \n" // f10 += f9 * f9; // sum_sqr += f9 * f9;
" addi a4, a4, -1 \n" // update loop counter
" bnez a4, 1b \n"
" j exit \n"
" exit: \n" // }
" fsw f4, a6, 0 \n" // w[0] = f4;
" fsw f5, a6, 4 \n" // w[1] = f5;
" fmr a2, f10 \n" // return sum_sqr;
" ret \n" //
);
我正在使用单精度指令,根据制造商的说法,应该支持该指令。不过,该unrecognized opcode
错误表明情况并非如此。
我已将板正确安装在 Arduino IDE 上,并且在编译之前选择了它,因此代码并不是为不相关的 RISC-V 芯片编译的,没有浮点支持。
我的代码的其余部分不是问题,因为我已将上述函数移植到 C++(遵循 Ivan 对汇编代码的注释),并且它们确实可以编译和工作。
Seeed Studio 不是制造商。他们销售使用 ESP32-C3(以及其他 CPU)的开发板,但他们不制造 ESP32-C3。Espressif 确实如此,关键是查看Espressif 的CPU 技术参考文档,而不是 Seeed Studio 的销售信息。
数据表很少指出不支持的内容,因此技术参考从未明确指出“不支持浮点扩展”。RISC V CPU有很多版本;乐鑫将 C3 中使用的 CPU 描述为支持“基本整数 (I)、乘法/除法 (M) 和压缩 (C) 标准扩展”(1.1 ESP RISC V CPU 概述)。
稍后在文档中,它表明单精度、双精度和四精度浮点扩展均不可用(
misa
机器 ISA 寄存器中的标志)。您尝试使用的指令 -
fmadd.s
、等 - 都是寄存器指示不支持的浮动计算指令fmv.s
的一部分,这就是您编写的代码无法构建的原因。misa
将 ESP32-C3 与其他 ESP32 CPU 进行比较,您可以将其视为该系列的“价值处理器”。乐鑫当前的 RISC-V CPU 内存较少,为单核,不支持 PSRAM,并且不具备单、双或四 RISC-V 浮点计算扩展。乐鑫的其他 CPU(最初的 ESP32、ESP32-S2 和 ESP32-S3)在不同方面都具有更强的功能,而且成本也更高。
我对您的替代方案的最佳建议是按照@Peter Cordes 在评论中建议的方式进行操作;检查编译器的汇编代码输出,并根据您在其中找到的内容编写代码。