我有一个SPARC程序集文件,该文件在SPARC T1 RTL代码上运行(通过汇编程序将其转换为内存映像)。但我需要知道RTL正在执行多少指令。无法在文件中手动计算指令数。如果你能告诉我这样做的话,我将非常感激...
由于文件很大,我附加了Dropbox链接: https://dl.dropboxusercontent.com/u/48635184/exu_muldiv.s
但这里是代码的前几行:
/***********************************************************************
* Name: exu_muldiv.s
* Date: April 23, 2003
*
* Description: Execute Mulx, Mulscc, Smul, Smulcc, Umul, Umulcc,
* Sdiv, Sdivcc, Sdivx, Udiv, Udivcc, Udivx with interesting data
* patterns in multiple threads. Also play with delay slots and
* intermixed mul and div operations.
*
* This test depends on SAS for result checking.
*
**********************************************************************/
! be sure to update when adding cases...
#define NUM_MUL_CASES 6
#define NUM_DIV_CASES 4
#define NUM_IMMED_CASES 5
! for divide by zero cases
#define H_T0_Division_By_Zero
#define My_T0_Division_By_Zero \
rdpr %tstate, %i1; \
rdpr %tt, %i1; \
rdpr %tpc, %i0; \
rdpr %tnpc, %i1; \
done; \
nop;
/*******************************************************/
#include "boot.s"
.global main
main:
th_fork(th_main,%l0) ! start up to four threads.
! All threads do the same thing, but with different data patterns.
! No need to run more than one core.
th_main_0:
setx mul_data_t0,%g7,%g1
ba all_threads1
nop
th_main_1:
setx mul_data_t1,%g7,%g1
ba all_threads1
nop
th_main_2:
setx mul_data_t2,%g7,%g1
ba all_threads1
nop
th_main_3:
setx mul_data_t3,%g7,%g1
ba all_threads1
nop
all_threads1:
!*************************************************************
! Operand2 as a register: MULX, UMUL, SMUL, UMULcc, SMULcc
!*************************************************************
add %g0,NUM_MUL_CASES,%g2
mova %icc,%g1,%g3 ! keep the multiply operand address handy
mul_loop1:
ldx [%g1],%l1
ldx [%g1+8],%l2
wr %g0,%g0,%ccr ! ccr clear
mulx %l1,%l2,%l4
umul %l1,%l2,%l5
rd %y,%i1 ! be sure SAS looks at Y-reg
smul %l1,%l2,%l6
rd %y,%i1
wr %g0,%g0,%ccr ! clear ccr
umulcc %l1,%l2,%l7
rd %y,%i1
wr %g0,%g0,%ccr
smulcc %l1,%l2,%l6
rd %y,%i1
wr %g0,0xff,%ccr ! set ccr. Should not matter.
umulcc %l1,%l2,%l7
rd %y,%i1
wr %g0,0xff,%ccr
smulcc %l1,%l2,%l6
rd %y,%i1
sub %g2,0x1,%g2
brnz,pt %g2,mul_loop1
add %g1,0x10,%g1 ! move operand pointer
!**********************************
! Operand2 as a register: MULScc
!**********************************
#ifndef MULSCC_BUGS
mova %icc,%g3,%g1 ! same ops as mul_loop1
add %g0,NUM_MUL_CASES,%g2
mulscc_1:
wr %g0,%g0,%ccr ! ccr clear
ldx [%g1],%l1
ldx [%g1+8],%l2
wr %l1,0,%y ! lower bits of multiplier into Y-reg
rd %y,%l0 ! for sas debug
srl %l1,0,%l1 ! clear rs1 upper
srl %l2,0,%l2 ! clear rs2 upper
mulx %l1,%l2,%l4 ! save for later compare
add %g0,0,%l1 ! clear rs1 (product upper) completely
add %g0,32,%l3 ! bit position counter
mulscc_2:
sub %l3,1,%l3
mulscc %l1,%l2,%l1
brgez,pt %l3,mulscc_2
nop
sllx %l1,33,%l1 ! product upper
rd %y,%l3 ! product lower
or %l1,%l3,%l3 ! full product should be equal to mulx
subcc %l3,%l4,%l5
tnz T_BAD_TRAP
nop
sub %g2,0x1,%g2
brnz,pt %g2,mulscc_1
add %g1,0x10,%g1 ! move operand pointer
#endif
!****************************************
! Operand2 as a register: SDIVX, UDIVX
!****************************************
rdth_id ! get thid in %o1
cmp %o1,0
be th_divx_0
cmp %o1,1
be th_divx_1
cmp %o1,2
be th_divx_2
nop
ba th_divx_3 ! if there are more than 4 threads...
nop
th_divx_0:
setx divx_data_t0,%g7,%g1
ba all_threads2
nop
th_divx_1:
setx divx_data_t1,%g7,%g1
ba all_threads2
nop
th_divx_2:
setx divx_data_t2,%g7,%g1
ba all_threads2
nop
th_divx_3:
setx divx_data_t3,%g7,%g1
ba all_threads2
nop
all_threads2:
add %g0,NUM_DIV_CASES,%g2
divx_loop1:
ldx [%g1],%l1
ldx [%g1+8],%l2
wr %g0,%g0,%ccr ! ccr clear
sdivx %l1,%l2,%l3
udivx %l1,%l2,%l4
sdivx %l2,%l1,%l5 ! use each operand as divisor and dividend
udivx %l2,%l1,%l6
sub %g2,0x1,%g2
brnz,pt %g2,divx_loop1
add %g1,0x10,%g1 ! move operand pointer
答案 0 :(得分:1)
我认为一般来说,了解动态指令计数的唯一方法是在一个计算指令的检测环境中实际运行代码。 (例如模拟器或带有执行计数器的CPU)。
我认为一个能够可靠地计算另一个程序将运行的指令数量的程序将等同于解决Halting Problem。
在许多实际情况中,循环后的分支将不依赖于循环计算的结果。如果循环结束条件很简单,例如将计数器递增到极限,则可以只将迭代*每个指令添加到总数并继续。
这不是一个简单的问题;复杂性可能类似于编写一个以asm作为输入的优化编译器。您需要分析数据流以确定将采用哪些分支,对于某些代码,您只需模拟发生的情况。
你最好的选择几乎肯定是找到像@Leo所说的模拟器/模拟器。