~bkerensa/ubuntu/raring/valgrind/merge-from-deb

« back to all changes in this revision

Viewing changes to callgrind/sim.c

Committer: Bazaar Package Importer
Author(s): Sebastian Dröge
Date: 2006-06-26 00:17:17 UTC
mto: This revision was merged to the branch mainline in revision 11.
Revision ID: james.westby@ubuntu.com-20060626001717-qi51nzty57cb12q6

Tags: upstream-3.2.0

Import upstream version 3.2.0

files added:
VEX/priv/guest-ppc

VEX/priv/guest-ppc/gdefs.h

VEX/priv/guest-ppc/ghelpers.c

VEX/priv/guest-ppc/toIR.c

VEX/priv/host-ppc

VEX/priv/host-ppc/hdefs.c

VEX/priv/host-ppc/hdefs.h

VEX/priv/host-ppc/isel.c

VEX/pub/libvex_guest_ppc64.h

auxprogs/libmpiwrap.c

auxprogs/mpiwrap_type_test.c

auxprogs/ppc64shifts.c

auxprogs/ppcfround.c

cachegrind/cg-ppc64.c

cachegrind/tests/ppc64

cachegrind/tests/ppc64/Makefile.am

cachegrind/tests/ppc64/Makefile.in

cachegrind/tests/wrap5.c

cachegrind/tests/wrap5.stderr.exp

cachegrind/tests/wrap5.stdout.exp

cachegrind/tests/wrap5.vgtest

callgrind

callgrind/Makefile.am

callgrind/Makefile.in

callgrind/bb.c

callgrind/bbcc.c

callgrind/callgrind.h

callgrind/callgrind_annotate.in

callgrind/callgrind_control.in

callgrind/callstack.c

callgrind/clo.c

callgrind/command.c

callgrind/context.c

callgrind/costs.c

callgrind/costs.h

callgrind/debug.c

callgrind/docs

callgrind/docs/Makefile.am

callgrind/docs/Makefile.in

callgrind/docs/cl-entities.xml

callgrind/docs/cl-format.xml

callgrind/docs/cl-manual.xml

callgrind/docs/index.xml

callgrind/docs/man-annotate.xml

callgrind/docs/man-callgrind.xml

callgrind/docs/man-control.xml

callgrind/dump.c

callgrind/events.c

callgrind/events.h

callgrind/fn.c

callgrind/global.h

callgrind/jumps.c

callgrind/main.c

callgrind/sim.c

callgrind/tests

callgrind/tests/Makefile.am

callgrind/tests/Makefile.in

callgrind/tests/clreq.c

callgrind/tests/clreq.stderr.exp

callgrind/tests/clreq.vgtest

callgrind/tests/filter_stderr

callgrind/tests/simwork.c

callgrind/tests/simwork1.stderr.exp

callgrind/tests/simwork1.stdout.exp

callgrind/tests/simwork1.vgtest

callgrind/tests/simwork2.stderr.exp

callgrind/tests/simwork2.stdout.exp

callgrind/tests/simwork2.vgtest

callgrind/tests/simwork3.stderr.exp

callgrind/tests/simwork3.stdout.exp

callgrind/tests/simwork3.vgtest

callgrind/tests/threads.c

callgrind/tests/threads.stderr.exp

callgrind/tests/threads.vgtest

callgrind/threads.c

coregrind/m_coredump/coredump-ppc64-linux.c

coregrind/m_debuginfo/README.txt

coregrind/m_debuginfo/UNUSED_STABS.txt

coregrind/m_debuginfo/debuginfo.c

coregrind/m_debuginfo/priv_readdwarf.h

coregrind/m_debuginfo/priv_readelf.h

coregrind/m_debuginfo/priv_readstabs.h

coregrind/m_debuginfo/priv_storage.h

coregrind/m_debuginfo/readdwarf.c

coregrind/m_debuginfo/readelf.c

coregrind/m_debuginfo/readstabs.c

coregrind/m_debuginfo/storage.c

coregrind/m_dispatch/dispatch-ppc64-linux.S

coregrind/m_sigframe/sigframe-ppc64-linux.c

coregrind/m_syswrap/syscall-ppc64-linux.S

coregrind/m_syswrap/syswrap-ppc64-linux.c

coregrind/pub_core_clreq.h

coregrind/vki_unistd-ppc64-linux.h

docs/html/QuickStart.html

docs/html/cl-format.html

docs/html/cl-manual.html

docs/html/faq.html

docs/internals/3_1_BUGSTATUS.txt

docs/internals/mpi2entries.txt

docs/internals/register-uses.txt

docs/lib/vg-html-common.xsl

docs/lib/vg-html-website.xsl

include/pub_tool_clreq.h

include/vki-ppc64-linux.h

include/vki_posixtypes-ppc64-linux.h

memcheck/mc_leakcheck.c

memcheck/mc_malloc_wrappers.c

memcheck/mc_replace_strmem.c

memcheck/tests/amd64/bt_everything.c

memcheck/tests/amd64/bt_everything.stderr.exp

memcheck/tests/amd64/bt_everything.stdout.exp

memcheck/tests/amd64/bt_everything.vgtest

memcheck/tests/amd64/more_x87_fp.c

memcheck/tests/amd64/more_x87_fp.stderr.exp

memcheck/tests/amd64/more_x87_fp.stdout.exp

memcheck/tests/amd64/more_x87_fp.vgtest

memcheck/tests/amd64/sse_memory.c

memcheck/tests/amd64/sse_memory.stderr.exp

memcheck/tests/amd64/sse_memory.stdout.exp

memcheck/tests/amd64/sse_memory.vgtest

memcheck/tests/deep_templates.cpp

memcheck/tests/deep_templates.stderr.exp

memcheck/tests/deep_templates.stdout.exp

memcheck/tests/deep_templates.vgtest

memcheck/tests/metadata.vgtest

memcheck/tests/partiallydefinedeq.stderr.exp2

memcheck/tests/pdb-realloc.c

memcheck/tests/pdb-realloc.stderr.exp

memcheck/tests/pdb-realloc.vgtest

memcheck/tests/pdb-realloc2.c

memcheck/tests/pdb-realloc2.stderr.exp

memcheck/tests/pdb-realloc2.stdout.exp

memcheck/tests/pdb-realloc2.vgtest

memcheck/tests/ppc64

memcheck/tests/ppc64/Makefile.am

memcheck/tests/ppc64/Makefile.in

memcheck/tests/sh-mem-random.c

memcheck/tests/sh-mem-random.stderr.exp

memcheck/tests/sh-mem-random.stdout.exp

memcheck/tests/sh-mem-random.stdout.exp64

memcheck/tests/sh-mem-random.vgtest

memcheck/tests/sh-mem.c

memcheck/tests/sh-mem.stderr.exp

memcheck/tests/sh-mem.vgtest

memcheck/tests/stack_changes.stdout.exp2

memcheck/tests/vcpu_bz2.c

memcheck/tests/vcpu_bz2.stderr.exp

memcheck/tests/vcpu_bz2.stdout.exp

memcheck/tests/vcpu_bz2.vgtest

memcheck/tests/vcpu_fbench.c

memcheck/tests/vcpu_fbench.stderr.exp

memcheck/tests/vcpu_fbench.stdout.exp

memcheck/tests/vcpu_fbench.vgtest

memcheck/tests/vcpu_fnfns.c

memcheck/tests/vcpu_fnfns.stderr.exp

memcheck/tests/vcpu_fnfns.stdout.exp

memcheck/tests/vcpu_fnfns.vgtest

memcheck/tests/wrap1.c

memcheck/tests/wrap1.stderr.exp

memcheck/tests/wrap1.stdout.exp

memcheck/tests/wrap1.vgtest

memcheck/tests/wrap2.c

memcheck/tests/wrap2.stderr.exp

memcheck/tests/wrap2.stdout.exp

memcheck/tests/wrap2.vgtest

memcheck/tests/wrap3.c

memcheck/tests/wrap3.stderr.exp

memcheck/tests/wrap3.stdout.exp

memcheck/tests/wrap3.vgtest

memcheck/tests/wrap4.c

memcheck/tests/wrap4.stderr.exp

memcheck/tests/wrap4.stdout.exp

memcheck/tests/wrap4.vgtest

memcheck/tests/wrap5.c

memcheck/tests/wrap5.stderr.exp

memcheck/tests/wrap5.stdout.exp

memcheck/tests/wrap5.vgtest

memcheck/tests/wrap6.c

memcheck/tests/wrap6.stderr.exp

memcheck/tests/wrap6.stdout.exp

memcheck/tests/wrap6.vgtest

memcheck/tests/wrap7.c

memcheck/tests/wrap7.stderr.exp

memcheck/tests/wrap7.stdout.exp

memcheck/tests/wrap7.vgtest

memcheck/tests/wrap7so.c

memcheck/tests/wrap8.c

memcheck/tests/wrap8.stderr.exp

memcheck/tests/wrap8.stderr.exp2

memcheck/tests/wrap8.stdout.exp

memcheck/tests/wrap8.stdout.exp2

memcheck/tests/wrap8.vgtest

memcheck/tests/x86/fprem.c

memcheck/tests/x86/fprem.stderr.exp

memcheck/tests/x86/fprem.stdout.exp

memcheck/tests/x86/fprem.vgtest

memcheck/tests/x86/more_x86_fp.c

memcheck/tests/x86/more_x86_fp.stderr.exp

memcheck/tests/x86/more_x86_fp.stdout.exp

memcheck/tests/x86/more_x86_fp.vgtest

memcheck/tests/x86/pushpopmem.c

memcheck/tests/x86/pushpopmem.stderr.exp

memcheck/tests/x86/pushpopmem.stdout.exp

memcheck/tests/x86/pushpopmem.vgtest

memcheck/tests/x86/sse1_memory.stderr.exp

memcheck/tests/x86/sse1_memory.stdout.exp

memcheck/tests/x86/sse1_memory.vgtest

memcheck/tests/x86/sse2_memory.stderr.exp

memcheck/tests/x86/sse2_memory.stdout.exp

memcheck/tests/x86/sse2_memory.vgtest

memcheck/tests/x86/sse_memory.c

none/tests/amd64/clc.c

none/tests/amd64/clc.stderr.exp

none/tests/amd64/clc.stdout.exp

none/tests/amd64/clc.vgtest

none/tests/amd64/faultstatus.disabled

none/tests/amd64/fcmovnu.c

none/tests/amd64/fcmovnu.stderr.exp

none/tests/amd64/fcmovnu.stdout.exp

none/tests/amd64/fcmovnu.vgtest

none/tests/amd64/fxtract.c

none/tests/amd64/fxtract.stderr.exp

none/tests/amd64/fxtract.stdout.exp

none/tests/amd64/fxtract.vgtest

none/tests/fdleak_cmsg.stderr.exp4

none/tests/mremap2.stdout.exp2

none/tests/ppc32/jm-fp.stderr.exp

none/tests/ppc32/jm-fp.stdout.exp

none/tests/ppc32/jm-fp.vgtest

none/tests/ppc32/jm-int.stderr.exp

none/tests/ppc32/jm-int.stdout.exp

none/tests/ppc32/jm-int.vgtest

none/tests/ppc32/ldstrev.c

none/tests/ppc32/ldstrev.stderr.exp

none/tests/ppc32/ldstrev.stdout.exp

none/tests/ppc32/ldstrev.vgtest

none/tests/ppc32/mftocrf.c

none/tests/ppc32/mftocrf.stderr.exp

none/tests/ppc32/mftocrf.stdout.exp

none/tests/ppc32/mftocrf.vgtest

none/tests/ppc32/round.c

none/tests/ppc32/round.stderr.exp

none/tests/ppc32/round.stdout.exp

none/tests/ppc32/round.vgtest

none/tests/ppc32/test_fx.c

none/tests/ppc32/test_fx.stderr.exp

none/tests/ppc32/test_fx.stdout.exp

none/tests/ppc32/test_fx.vgtest

none/tests/ppc32/test_gx.c

none/tests/ppc32/test_gx.stderr.exp

none/tests/ppc32/test_gx.stdout.exp

none/tests/ppc32/test_gx.vgtest

none/tests/ppc32/twi.c

none/tests/ppc32/twi.stderr.exp

none/tests/ppc32/twi.stdout.exp

none/tests/ppc32/twi.vgtest

none/tests/ppc32/xlc_dbl_u32.c

none/tests/ppc32/xlc_dbl_u32.stderr.exp

none/tests/ppc32/xlc_dbl_u32.stdout.exp

none/tests/ppc32/xlc_dbl_u32.vgtest

none/tests/ppc64

none/tests/ppc64/Makefile.am

none/tests/ppc64/Makefile.in

none/tests/ppc64/filter_stderr

none/tests/ppc64/jm-fp.stderr.exp

none/tests/ppc64/jm-fp.stdout.exp

none/tests/ppc64/jm-fp.vgtest

none/tests/ppc64/jm-insns.c

none/tests/ppc64/jm-int.stderr.exp

none/tests/ppc64/jm-int.stdout.exp

none/tests/ppc64/jm-int.vgtest

none/tests/ppc64/jm-vmx.stderr.exp

none/tests/ppc64/jm-vmx.stdout.exp

none/tests/ppc64/jm-vmx.vgtest

none/tests/ppc64/lsw.c

none/tests/ppc64/lsw.stderr.exp

none/tests/ppc64/lsw.stdout.exp

none/tests/ppc64/lsw.vgtest

none/tests/ppc64/round.c

none/tests/ppc64/round.stderr.exp

none/tests/ppc64/round.stdout.exp

none/tests/ppc64/round.vgtest

none/tests/ppc64/twi_tdi.c

none/tests/ppc64/twi_tdi.stderr.exp

none/tests/ppc64/twi_tdi.stdout.exp

none/tests/ppc64/twi_tdi.vgtest

none/tests/x86/faultstatus.disabled

none/tests/x86/fcmovnu.c

none/tests/x86/fcmovnu.stderr.exp

none/tests/x86/fcmovnu.stdout.exp

none/tests/x86/fcmovnu.vgtest

none/tests/x86/int.disabled

none/tests/x86/movx.c

none/tests/x86/movx.stderr.exp

none/tests/x86/movx.stdout.exp

none/tests/x86/movx.vgtest

none/tests/x86/yield.disabled

perf

perf/Makefile.am

perf/Makefile.in

perf/README

perf/bigcode.c

perf/bigcode1.vgperf

perf/bigcode2.vgperf

perf/bz2.c

perf/bz2.vgperf

perf/fbench.c

perf/fbench.vgperf

perf/ffbench.c

perf/ffbench.vgperf

perf/heap.c

perf/heap.vgperf

perf/sarp.c

perf/sarp.vgperf

perf/test_input_for_tinycc.c

perf/tinycc.c

perf/tinycc.vgperf

perf/vg_perf

perf/vg_perf.in

files removed:
VEX/priv/guest-ppc32

VEX/priv/guest-ppc32/gdefs.h

VEX/priv/guest-ppc32/ghelpers.c

VEX/priv/guest-ppc32/toIR.c

VEX/priv/host-ppc32

VEX/priv/host-ppc32/hdefs.c

VEX/priv/host-ppc32/hdefs.h

VEX/priv/host-ppc32/isel.c

addrcheck

addrcheck/Makefile.am

addrcheck/Makefile.in

addrcheck/ac_main.c

addrcheck/docs

addrcheck/docs/Makefile.am

addrcheck/docs/Makefile.in

addrcheck/docs/ac-manual.xml

addrcheck/tests

addrcheck/tests/Makefile.am

addrcheck/tests/Makefile.in

addrcheck/tests/addressable.stderr.exp

addrcheck/tests/addressable.stdout.exp

addrcheck/tests/addressable.vgtest

addrcheck/tests/badrw.stderr.exp

addrcheck/tests/badrw.vgtest

addrcheck/tests/filter_stderr

addrcheck/tests/fprw.stderr.exp

addrcheck/tests/fprw.vgtest

addrcheck/tests/overlap.stderr.exp

addrcheck/tests/overlap.stdout.exp

addrcheck/tests/overlap.vgtest

addrcheck/tests/toobig-allocs.stderr.exp

addrcheck/tests/toobig-allocs.vgtest

coregrind/coregrind.h

coregrind/m_aspacemgr/read_procselfmaps.c

coregrind/m_debuginfo/dwarf.c

coregrind/m_debuginfo/priv_symtab.h

coregrind/m_debuginfo/priv_symtypes.h

coregrind/m_debuginfo/stabs.c

coregrind/m_debuginfo/symtab.c

coregrind/m_debuginfo/symtypes.c

coregrind/m_profile.c

coregrind/pub_core_profile.h

docs/html/ac-manual.html

docs/html/bk01ar01.html

docs/html/bk03ar01.html

docs/internals/64-bit-cleanness.txt

include/pub_tool_profile.h

memcheck/mac_leakcheck.c

memcheck/mac_malloc_wrappers.c

memcheck/mac_replace_strmem.c

memcheck/mac_shared.c

memcheck/mac_shared.h

memcheck/tests/metadata.vgtest-HIDING

none/tests/amd64/faultstatus.vgtest

none/tests/ppc32/jm-insns.stderr.exp

none/tests/ppc32/jm-insns.stdout.exp

none/tests/ppc32/jm-insns.vgtest

none/tests/x86/faultstatus.vgtest

none/tests/x86/int.vgtest

none/tests/x86/yield.vgtest

files modified:
AUTHORS

FAQ.txt

Makefile.am

Makefile.core.am

Makefile.flags.am

Makefile.in

Makefile.install.am

Makefile.tool-flags.am

Makefile.tool.am

NEWS

README

README_DEVELOPERS

VEX/LICENSE.README

VEX/Makefile

VEX/auxprogs/genoffsets.c

VEX/priv/guest-amd64/gdefs.h

VEX/priv/guest-amd64/ghelpers.c

VEX/priv/guest-amd64/toIR.c

VEX/priv/guest-arm/gdefs.h

VEX/priv/guest-arm/ghelpers.c

VEX/priv/guest-arm/toIR.c

VEX/priv/guest-generic/bb_to_IR.c

VEX/priv/guest-generic/bb_to_IR.h

VEX/priv/guest-generic/g_generic_x87.c

VEX/priv/guest-generic/g_generic_x87.h

VEX/priv/guest-x86/gdefs.h

VEX/priv/guest-x86/ghelpers.c

VEX/priv/guest-x86/toIR.c

VEX/priv/host-amd64/hdefs.c

VEX/priv/host-amd64/hdefs.h

VEX/priv/host-amd64/isel.c

VEX/priv/host-arm/hdefs.c

VEX/priv/host-arm/hdefs.h

VEX/priv/host-arm/isel.c

VEX/priv/host-generic/h_generic_regs.c

VEX/priv/host-generic/h_generic_regs.h

VEX/priv/host-generic/h_generic_simd64.c

VEX/priv/host-generic/h_generic_simd64.h

VEX/priv/host-generic/reg_alloc2.c

VEX/priv/host-x86/hdefs.c

VEX/priv/host-x86/hdefs.h

VEX/priv/host-x86/isel.c

VEX/priv/ir/irdefs.c

VEX/priv/ir/irmatch.c

VEX/priv/ir/irmatch.h

VEX/priv/ir/iropt.c

VEX/priv/ir/iropt.h

VEX/priv/main/vex_globals.c

VEX/priv/main/vex_globals.h

VEX/priv/main/vex_main.c

VEX/priv/main/vex_svnversion.h

VEX/priv/main/vex_util.c

VEX/priv/main/vex_util.h

VEX/pub/libvex.h

VEX/pub/libvex_basictypes.h

VEX/pub/libvex_emwarn.h

VEX/pub/libvex_guest_amd64.h

VEX/pub/libvex_guest_arm.h

VEX/pub/libvex_guest_offsets.h

VEX/pub/libvex_guest_ppc32.h

VEX/pub/libvex_guest_x86.h

VEX/pub/libvex_ir.h

VEX/pub/libvex_trc_values.h

auxprogs/Makefile.am

auxprogs/Makefile.in

auxprogs/gen-mdg

auxprogs/gsl16test

auxprogs/valgrind-listener.c

cachegrind/Makefile.am

cachegrind/Makefile.in

cachegrind/cg-amd64.c

cachegrind/cg-ppc32.c

cachegrind/cg-x86.c

cachegrind/cg_annotate.in

cachegrind/cg_arch.h

cachegrind/cg_main.c

cachegrind/cg_sim.c

cachegrind/docs/Makefile.in

cachegrind/docs/cg-manual.xml

cachegrind/tests/Makefile.am

cachegrind/tests/Makefile.in

cachegrind/tests/amd64/Makefile.am

cachegrind/tests/amd64/Makefile.in

cachegrind/tests/filter_stderr

cachegrind/tests/ppc32/Makefile.am

cachegrind/tests/ppc32/Makefile.in

cachegrind/tests/x86/Makefile.in

config.h.in

configure

configure.in

coregrind/Makefile.am

coregrind/Makefile.in

coregrind/launcher.c

coregrind/m_aspacemgr/aspacemgr.c

coregrind/m_clientstate.c

coregrind/m_commandline.c

coregrind/m_coredump/coredump-amd64-linux.c

coregrind/m_coredump/coredump-elf.c

coregrind/m_coredump/coredump-ppc32-linux.c

coregrind/m_coredump/coredump-x86-linux.c

coregrind/m_coredump/priv_elf.h

coregrind/m_cpuid.S

coregrind/m_debugger.c

coregrind/m_debuglog.c

coregrind/m_demangle/demangle.c

coregrind/m_dispatch/dispatch-amd64-linux.S

coregrind/m_dispatch/dispatch-ppc32-linux.S

coregrind/m_dispatch/dispatch-x86-linux.S

coregrind/m_errormgr.c

coregrind/m_execontext.c

coregrind/m_hashtable.c

coregrind/m_libcassert.c

coregrind/m_libcbase.c

coregrind/m_libcfile.c

coregrind/m_libcprint.c

coregrind/m_libcproc.c

coregrind/m_libcsignal.c

coregrind/m_machine.c

coregrind/m_main.c

coregrind/m_mallocfree.c

coregrind/m_options.c

coregrind/m_oset.c

coregrind/m_redir.c

coregrind/m_replacemalloc/replacemalloc_core.c

coregrind/m_replacemalloc/vg_replace_malloc.c

coregrind/m_scheduler/priv_sema.h

coregrind/m_scheduler/scheduler.c

coregrind/m_scheduler/sema.c

coregrind/m_sigframe/sigframe-amd64-linux.c

coregrind/m_sigframe/sigframe-ppc32-linux.c

coregrind/m_sigframe/sigframe-x86-linux.c

coregrind/m_signals.c

coregrind/m_stacks.c

coregrind/m_stacktrace.c

coregrind/m_syscall.c

coregrind/m_syswrap/priv_syswrap-generic.h

coregrind/m_syswrap/priv_syswrap-linux-variants.h

coregrind/m_syswrap/priv_syswrap-linux.h

coregrind/m_syswrap/priv_syswrap-main.h

coregrind/m_syswrap/priv_types_n_macros.h

coregrind/m_syswrap/syscall-amd64-linux.S

coregrind/m_syswrap/syscall-ppc32-linux.S

coregrind/m_syswrap/syscall-x86-linux.S

coregrind/m_syswrap/syswrap-amd64-linux.c

coregrind/m_syswrap/syswrap-generic.c

coregrind/m_syswrap/syswrap-linux-variants.c

coregrind/m_syswrap/syswrap-linux.c

coregrind/m_syswrap/syswrap-main.c

coregrind/m_syswrap/syswrap-ppc32-linux.c

coregrind/m_syswrap/syswrap-x86-linux.c

coregrind/m_threadmodel.c

coregrind/m_threadstate.c

coregrind/m_tooliface.c

coregrind/m_trampoline.S

coregrind/m_translate.c

coregrind/m_transtab.c

coregrind/m_ume.c

coregrind/pub_core_aspacemgr.h

coregrind/pub_core_basics.h

coregrind/pub_core_basics_asm.h

coregrind/pub_core_clientstate.h

coregrind/pub_core_commandline.h

coregrind/pub_core_coredump.h

coregrind/pub_core_cpuid.h

coregrind/pub_core_debugger.h

coregrind/pub_core_debuginfo.h

coregrind/pub_core_debuglog.h

coregrind/pub_core_demangle.h

coregrind/pub_core_dispatch.h

coregrind/pub_core_dispatch_asm.h

coregrind/pub_core_errormgr.h

coregrind/pub_core_execontext.h

coregrind/pub_core_hashtable.h

coregrind/pub_core_libcassert.h

coregrind/pub_core_libcbase.h

coregrind/pub_core_libcfile.h

coregrind/pub_core_libcprint.h

coregrind/pub_core_libcproc.h

coregrind/pub_core_libcsignal.h

coregrind/pub_core_machine.h

coregrind/pub_core_mallocfree.h

coregrind/pub_core_options.h

coregrind/pub_core_oset.h

coregrind/pub_core_redir.h

coregrind/pub_core_replacemalloc.h

coregrind/pub_core_scheduler.h

coregrind/pub_core_sigframe.h

coregrind/pub_core_signals.h

coregrind/pub_core_stacks.h

coregrind/pub_core_stacktrace.h

coregrind/pub_core_syscall.h

coregrind/pub_core_syswrap.h

coregrind/pub_core_threadmodel.h

coregrind/pub_core_threadstate.h

coregrind/pub_core_tooliface.h

coregrind/pub_core_trampoline.h

coregrind/pub_core_translate.h

coregrind/pub_core_transtab.h

coregrind/pub_core_transtab_asm.h

coregrind/pub_core_ume.h

coregrind/vg_preloaded.c

coregrind/vki_unistd-amd64-linux.h

coregrind/vki_unistd-ppc32-linux.h

coregrind/vki_unistd-x86-linux.h

coregrind/vki_unistd.h

default.supp

docs/Makefile.am

docs/Makefile.in

docs/README

docs/html/FAQ.html

docs/html/cg-manual.html

docs/html/cg-tech-docs.html

docs/html/dist.authors.html

docs/html/dist.html

docs/html/dist.news.html

docs/html/dist.readme-developers.html

docs/html/dist.readme.html

docs/html/hg-manual.html

docs/html/index.html

docs/html/lk-manual.html

docs/html/manual-core.html

docs/html/manual-intro.html

docs/html/manual.html

docs/html/mc-manual.html

docs/html/mc-tech-docs.html

docs/html/ms-manual.html

docs/html/nl-manual.html

docs/html/quick-start.html

docs/html/tech-docs.html

docs/html/vg_basic.css

docs/html/writing-tools.html

docs/images/Makefile.in

docs/index.pdf

docs/index.ps

docs/internals/Makefile.am

docs/internals/Makefile.in

docs/internals/release-HOWTO.txt

docs/internals/roadmap.txt

docs/internals/xml-output.txt

docs/lib/Makefile.am

docs/lib/Makefile.in

docs/lib/line-wrap.xsl

docs/lib/vg-fo.xsl

docs/lib/vg-html-chunk.xsl

docs/lib/vg_basic.css

docs/valgrind.1

docs/xml/FAQ.xml

docs/xml/Makefile.in

docs/xml/dist-docs.xml

docs/xml/index.xml

docs/xml/licenses.xml

docs/xml/manual-core.xml

docs/xml/manual-intro.xml

docs/xml/manual.xml

docs/xml/quick-start-guide.xml

docs/xml/tech-docs.xml

docs/xml/vg-entities.xml

docs/xml/writing-tools.xml

docs/xml/xml_help.txt

glibc-2.2.supp

glibc-2.3.supp

glibc-2.4.supp

helgrind/Makefile.am

helgrind/Makefile.in

helgrind/docs/Makefile.in

helgrind/docs/hg-manual.xml

helgrind/helgrind.h

helgrind/hg_main.c

helgrind/tests/Makefile.am

helgrind/tests/Makefile.in

include/Makefile.am

include/Makefile.in

include/pub_tool_aspacemgr.h

include/pub_tool_basics.h

include/pub_tool_basics_asm.h

include/pub_tool_clientstate.h

include/pub_tool_cpuid.h

include/pub_tool_debuginfo.h

include/pub_tool_errormgr.h

include/pub_tool_execontext.h

include/pub_tool_hashtable.h

include/pub_tool_libcassert.h

include/pub_tool_libcbase.h

include/pub_tool_libcfile.h

include/pub_tool_libcprint.h

include/pub_tool_libcproc.h

include/pub_tool_libcsignal.h

include/pub_tool_machine.h

include/pub_tool_mallocfree.h

include/pub_tool_options.h

include/pub_tool_oset.h

include/pub_tool_redir.h

include/pub_tool_replacemalloc.h

include/pub_tool_signals.h

include/pub_tool_stacktrace.h

include/pub_tool_threadstate.h

include/pub_tool_tooliface.h

include/valgrind.h

include/vki-amd64-linux.h

include/vki-linux.h

include/vki-ppc32-linux.h

include/vki-x86-linux.h

include/vki_posixtypes-amd64-linux.h

include/vki_posixtypes-x86-linux.h

lackey/Makefile.am

lackey/Makefile.in

lackey/docs/Makefile.in

lackey/docs/lk-manual.xml

lackey/lk_main.c

lackey/tests/Makefile.in

massif/Makefile.am

massif/Makefile.in

massif/docs/Makefile.in

massif/docs/ms-manual.xml

massif/hp2ps/HpFile.c

massif/hp2ps/Main.c

massif/hp2ps/Main.h

massif/hp2ps/Makefile.am

massif/hp2ps/Makefile.in

massif/hp2ps/Shade.c

massif/hp2ps/Utilities.c

massif/ms_main.c

massif/tests/Makefile.am

massif/tests/Makefile.in

memcheck/Makefile.am

memcheck/Makefile.in

memcheck/docs/Makefile.in

memcheck/docs/mc-manual.xml

memcheck/docs/mc-tech-docs.xml

memcheck/mc_include.h

memcheck/mc_main.c

memcheck/mc_translate.c

memcheck/memcheck.h

memcheck/tests/Makefile.am

memcheck/tests/Makefile.in

memcheck/tests/addressable.c

memcheck/tests/amd64/Makefile.am

memcheck/tests/amd64/Makefile.in

memcheck/tests/amd64/insn_sse2.stdout.exp

memcheck/tests/badjump.c

memcheck/tests/badjump.stderr.exp

memcheck/tests/badjump2.c

memcheck/tests/badjump2.stderr.exp

memcheck/tests/badpoll.stderr.exp

memcheck/tests/buflen_check.stderr.exp2

memcheck/tests/clientperm.c

memcheck/tests/custom_alloc.c

memcheck/tests/execve.stderr.exp2

memcheck/tests/execve2.stderr.exp2

memcheck/tests/filter_stderr

memcheck/tests/fwrite.stderr.exp

memcheck/tests/leak-cycle.c

memcheck/tests/memcmptest.stderr.exp

memcheck/tests/memcmptest.stderr.exp2

memcheck/tests/mempool.c

memcheck/tests/metadata.c

memcheck/tests/metadata.stderr.exp

memcheck/tests/metadata.stdout.exp

memcheck/tests/nanoleak.supp

memcheck/tests/oset_test.c

memcheck/tests/overlap.stderr.exp

memcheck/tests/partiallydefinedeq.c

memcheck/tests/pointer-trace.c

memcheck/tests/pointer-trace.stderr.exp

memcheck/tests/post-syscall.c

memcheck/tests/ppc32/Makefile.am

memcheck/tests/ppc32/Makefile.in

memcheck/tests/sigprocmask.c

memcheck/tests/stack_changes.c

memcheck/tests/stack_switch.c

memcheck/tests/strchr.stderr.exp

memcheck/tests/strchr.stderr.exp2

memcheck/tests/supp.supp

memcheck/tests/supp2.stderr.exp

memcheck/tests/supp_unknown.stderr.exp

memcheck/tests/supp_unknown.supp

memcheck/tests/x86/Makefile.am

memcheck/tests/x86/Makefile.in

memcheck/tests/x86/scalar.c

memcheck/tests/x86/scalar_exit_group.stderr.exp

memcheck/tests/x86/tronical.S

memcheck/tests/x86/tronical.stderr.exp

memcheck/tests/xml1.stderr.exp

memcheck/tests/xml1.stderr.exp64

memcheck/tests/zeropage.c

none/Makefile.am

none/Makefile.in

none/docs/Makefile.in

none/docs/nl-manual.xml

none/nl_main.c

none/tests/Makefile.am

none/tests/Makefile.in

none/tests/amd64/Makefile.am

none/tests/amd64/Makefile.in

none/tests/amd64/insn_sse2.def

none/tests/amd64/insn_sse2.stdout.exp

none/tests/cmdline1.stdout.exp

none/tests/cmdline2.stdout.exp

none/tests/fdleak_creat.stderr.exp

none/tests/fdleak_dup.stderr.exp

none/tests/fdleak_dup2.stderr.exp

none/tests/fdleak_fcntl.stderr.exp

none/tests/fdleak_open.stderr.exp

none/tests/fdleak_pipe.stderr.exp

none/tests/fdleak_socketpair.stderr.exp

none/tests/fdleak_socketpair.stderr.exp2

none/tests/filter_fdleak

none/tests/ppc32/Makefile.am

none/tests/ppc32/Makefile.in

none/tests/ppc32/jm-insns.c

none/tests/ppc32/jm-vmx.stdout.exp

none/tests/x86/Makefile.am

none/tests/x86/Makefile.in

tests/Makefile.am

tests/Makefile.in

tests/cputest.c

tests/filter_libc

tests/vg_regtest

tests/vg_regtest.in

xfree-3.supp

xfree-4.supp

Show diffs side-by-side

added added

removed removed

callgrind/sim.c

/*--------------------------------------------------------------------*/

/*--- Cache simulation. ---*/

/*--- sim.c ---*/

/*--------------------------------------------------------------------*/

This file is part of Callgrind.

njn25@cam.ac.uk

This program is free software; you can redistribute it and/or

modify it under the terms of the GNU General Public License as

published by the Free Software Foundation; either version 2 of the

License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but

WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

General Public License for more details.

You should have received a copy of the GNU General Public License

along with this program; if not, write to the Free Software

Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA

02111-1307, USA.

The GNU General Public License is contained in the file COPYING.

#include "global.h"

/* Notes:

- simulates a write-allocate cache

- (block --> set) hash function uses simple bit selection

- handling of references straddling two cache blocks:

- counts as only one cache access (not two)

- both blocks hit --> one hit

- one block hits, the other misses --> one miss

- both blocks miss --> one miss (not two)

/* Cache configuration */

#include "cg_arch.h"

/* additional structures for cache use info, separated

* according usage frequency:

* - line_loaded : pointer to cost center of instruction

* which loaded the line into cache.

* Needed to increment counters when line is evicted.

* - line_use : updated on every access

typedef struct {

UInt count;

UInt mask; /* e.g. for 64Byte line size 1bit/2Byte */

} line_use;

typedef struct {

Addr memline, iaddr;

line_use* dep_use; /* point to higher-level cacheblock for this memline */

ULong* use_base;

} line_loaded;

/* Cache state */

typedef struct {

char* name;

int size; /* bytes */

int assoc;

int line_size; /* bytes */

Bool sectored; /* prefetch nearside cacheline on read */

int sets;

int sets_min_1;

int assoc_bits;

int line_size_bits;

int tag_shift;

UWord tag_mask;

char desc_line[128];

UWord* tags;

/* for cache use */

int line_size_mask;

int* line_start_mask;

int* line_end_mask;

line_loaded* loaded;

line_use* use;

} cache_t2;

* States of flat caches in our model.

* We use a 2-level hierarchy,

static cache_t2 I1, D1, L2;

/* Lower bits of cache tags are used as flags for a cache line */

#define CACHELINE_FLAGMASK (MIN_LINE_SIZE-1)

#define CACHELINE_DIRTY 1

100

101

102

/* Cache simulator Options */

103

static Bool clo_simulate_writeback = False;

104

static Bool clo_simulate_hwpref = False;

105

static Bool clo_simulate_sectors = False;

106

static Bool clo_collect_cacheuse = False;

107

108

/* Following global vars are setup before by

109

* setup_bbcc()/cachesim_after_bbsetup():

110

111

* - Addr bb_base (instruction start address of original BB)

112

* - ULong* cost_base (start of cost array for BB)

113

* - BBCC* nonskipped (only != 0 when in a function not skipped)

114

115

116

/* Offset to events in event set, used in log_* functions */

117

static Int off_D0_Ir;

118

static Int off_D1r_Ir;

119

static Int off_D1r_Dr;

120

static Int off_D1w_Ir;

121

static Int off_D1w_Dw;

122

static Int off_D2_Ir;

123

static Int off_D2_Dr;

124

static Int off_D2_Dw;

125

126

static Addr bb_base;

127

static ULong* cost_base;

128

static InstrInfo* current_ii;

129

130

/* Cache use offsets */

131

/* FIXME: The offsets are only correct because all eventsets get

132

* the "Use" set added first !

133

134

static Int off_I1_AcCost = 0;

135

static Int off_I1_SpLoss = 1;

136

static Int off_D1_AcCost = 0;

137

static Int off_D1_SpLoss = 1;

138

static Int off_L2_AcCost = 2;

139

static Int off_L2_SpLoss = 3;

140

141

/* Cache access types */

142

typedef enum { Read = 0, Write = CACHELINE_DIRTY } RefType;

143

144

/* Result of a reference into a flat cache */

145

typedef enum { Hit = 0, Miss, MissDirty } CacheResult;

146

147

/* Result of a reference into a hierarchical cache model */

148

typedef enum {

149

L1_Hit,

150

L2_Hit,

151

MemAccess,

152

WriteBackMemAccess } CacheModelResult;

153

154

typedef CacheModelResult (*simcall_type)(Addr, UChar);

155

156

static struct {

157

simcall_type I1_Read;

158

simcall_type D1_Read;

159

simcall_type D1_Write;

160

} simulator;

161

162

/*------------------------------------------------------------*/

163

/*--- Cache Simulator Initialization ---*/

164

/*------------------------------------------------------------*/

165

166

static void cachesim_clearcache(cache_t2* c)

167

{

168

Int i;

169

170

for (i = 0; i < c->sets * c->assoc; i++)

171

c->tags[i] = 0;

172

if (c->use) {

173

for (i = 0; i < c->sets * c->assoc; i++) {

174

c->loaded[i].memline = 0;

175

c->loaded[i].use_base = 0;

176

c->loaded[i].dep_use = 0;

177

c->loaded[i].iaddr = 0;

178

c->use[i].mask = 0;

179

c->use[i].count = 0;

180

c->tags[i] = i % c->assoc; /* init lower bits as pointer */

181

}

182

}

183

}

184

185

static void cacheuse_initcache(cache_t2* c);

186

187

/* By this point, the size/assoc/line_size has been checked. */

188

static void cachesim_initcache(cache_t config, cache_t2* c)

189

{

190

c->size = config.size;

191

c->assoc = config.assoc;

192

c->line_size = config.line_size;

193

c->sectored = False; // FIXME

194

195

c->sets = (c->size / c->line_size) / c->assoc;

196

c->sets_min_1 = c->sets - 1;

197

c->assoc_bits = VG_(log2)(c->assoc);

198

c->line_size_bits = VG_(log2)(c->line_size);

199

c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);

200

c->tag_mask = ~((1<<c->tag_shift)-1);

201

202

/* Can bits in tag entries be used for flags?

203

* Should be always true as MIN_LINE_SIZE >= 16 */

204

CLG_ASSERT( (c->tag_mask & CACHELINE_FLAGMASK) == 0);

205

206

if (c->assoc == 1) {

207

VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped%s",

208

c->size, c->line_size,

209

c->sectored ? ", sectored":"");

210

} else {

211

VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative%s",

212

c->size, c->line_size, c->assoc,

213

c->sectored ? ", sectored":"");

214

}

215

216

c->tags = (UWord*) CLG_MALLOC(sizeof(UWord) * c->sets * c->assoc);

217

if (clo_collect_cacheuse)

218

cacheuse_initcache(c);

219

else

220

c->use = 0;

221

cachesim_clearcache(c);

222

}

223

224

225

#if 0

226

static void print_cache(cache_t2* c)

227

{

228

UInt set, way, i;

229

230

/* Note initialisation and update of 'i'. */

231

for (i = 0, set = 0; set < c->sets; set++) {

232

for (way = 0; way < c->assoc; way++, i++) {

233

VG_(printf)("%8x ", c->tags[i]);

234

}

235

VG_(printf)("\n");

236

}

237

}

238

#endif

239

240

241

/*------------------------------------------------------------*/

242

/*--- Write Through Cache Simulation ---*/

243

/*------------------------------------------------------------*/

244

245

246

* Simple model: L1 & L2 Write Through

247

* Does not distinguish among read and write references

248

249

* Simulator functions:

250

* CacheModelResult cachesim_I1_ref(Addr a, UChar size)

251

* CacheModelResult cachesim_D1_ref(Addr a, UChar size)

252

253

254

static __inline__

255

CacheResult cachesim_setref(cache_t2* c, UInt set_no, UWord tag)

256

{

257

int i, j;

258

UWord *set;

259

260

/* Shifting is a bit faster than multiplying */

261

set = &(c->tags[set_no << c->assoc_bits]);

262

263

/* This loop is unrolled for just the first case, which is the most */

264

/* common. We can't unroll any further because it would screw up */

265

/* if we have a direct-mapped (1-way) cache. */

266

if (tag == set[0])

267

return Hit;

268

269

/* If the tag is one other than the MRU, move it into the MRU spot */

270

/* and shuffle the rest down. */

271

for (i = 1; i < c->assoc; i++) {

272

if (tag == set[i]) {

273

for (j = i; j > 0; j--) {

274

set[j] = set[j - 1];

275

}

276

set[0] = tag;

277

return Hit;

278

}

279

}

280

281

/* A miss; install this tag as MRU, shuffle rest down. */

282

for (j = c->assoc - 1; j > 0; j--) {

283

set[j] = set[j - 1];

284

}

285

set[0] = tag;

286

287

return Miss;

288

}

289

290

static CacheResult cachesim_ref(cache_t2* c, Addr a, UChar size)

291

{

292

UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);

293

UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);

294

UWord tag = a >> c->tag_shift;

295

296

/* Access entirely within line. */

297

if (set1 == set2)

298

return cachesim_setref(c, set1, tag);

299

300

/* Access straddles two lines. */

301

/* Nb: this is a fast way of doing ((set1+1) % c->sets) */

302

else if (((set1 + 1) & (c->sets-1)) == set2) {

303

304

/* the call updates cache structures as side effect */

305

CacheResult res1 = cachesim_setref(c, set1, tag);

306

CacheResult res2 = cachesim_setref(c, set2, tag);

307

return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;

308

309

} else {

310

VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2);

311

VG_(tool_panic)("item straddles more than two cache sets");

312

}

313

return Hit;

314

}

315

316

static

317

CacheModelResult cachesim_I1_ref(Addr a, UChar size)

318

{

319

if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;

320

if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;

321

return MemAccess;

322

}

323

324

static

325

CacheModelResult cachesim_D1_ref(Addr a, UChar size)

326

{

327

if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;

328

if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;

329

return MemAccess;

330

}

331

332

333

/*------------------------------------------------------------*/

334

/*--- Write Back Cache Simulation ---*/

335

/*------------------------------------------------------------*/

336

337

338

* More complex model: L1 Write-through, L2 Write-back

339

* This needs to distinguish among read and write references.

340

341

* Simulator functions:

342

* CacheModelResult cachesim_I1_Read(Addr a, UChar size)

343

* CacheModelResult cachesim_D1_Read(Addr a, UChar size)

344

* CacheModelResult cachesim_D1_Write(Addr a, UChar size)

345

346

347

348

* With write-back, result can be a miss evicting a dirty line

349

* The dirty state of a cache line is stored in Bit0 of the tag for

350

* this cache line (CACHELINE_DIRTY = 1). By OR'ing the reference

351

* type (Read/Write), the line gets dirty on a write.

352

353

static __inline__

354

CacheResult cachesim_setref_wb(cache_t2* c, RefType ref, UInt set_no, UWord tag)

355

{

356

int i, j;

357

UWord *set, tmp_tag;

358

359

/* Shifting is a bit faster than multiplying */

360

set = &(c->tags[set_no << c->assoc_bits]);

361

362

/* This loop is unrolled for just the first case, which is the most */

363

/* common. We can't unroll any further because it would screw up */

364

/* if we have a direct-mapped (1-way) cache. */

365

if (tag == (set[0] & ~CACHELINE_DIRTY)) {

366

set[0] |= ref;

367

return Hit;

368

}

369

/* If the tag is one other than the MRU, move it into the MRU spot */

370

/* and shuffle the rest down. */

371

for (i = 1; i < c->assoc; i++) {

372

if (tag == (set[i] & ~CACHELINE_DIRTY)) {

373

tmp_tag = set[i] | ref; // update dirty flag

374

for (j = i; j > 0; j--) {

375

set[j] = set[j - 1];

376

}

377

set[0] = tmp_tag;

378

return Hit;

379

}

380

}

381

382

/* A miss; install this tag as MRU, shuffle rest down. */

383

tmp_tag = set[c->assoc - 1];

384

for (j = c->assoc - 1; j > 0; j--) {

385

set[j] = set[j - 1];

386

}

387

set[0] = tag | ref;

388

389

return (tmp_tag & CACHELINE_DIRTY) ? MissDirty : Miss;

390

}

391

392

393

static __inline__

394

CacheResult cachesim_ref_wb(cache_t2* c, RefType ref, Addr a, UChar size)

395

{

396

UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);

397

UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);

398

UWord tag = a & c->tag_mask;

399

400

/* Access entirely within line. */

401

if (set1 == set2)

402

return cachesim_setref_wb(c, ref, set1, tag);

403

404

/* Access straddles two lines. */

405

/* Nb: this is a fast way of doing ((set1+1) % c->sets) */

406

else if (((set1 + 1) & (c->sets-1)) == set2) {

407

408

/* the call updates cache structures as side effect */

409

CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag);

410

CacheResult res2 = cachesim_setref_wb(c, ref, set2, tag);

411

412

if ((res1 == MissDirty) || (res2 == MissDirty)) return MissDirty;

413

return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;

414

415

} else {

416

VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2);

417

VG_(tool_panic)("item straddles more than two cache sets");

418

}

419

return Hit;

420

}

421

422

423

static

424

CacheModelResult cachesim_I1_Read(Addr a, UChar size)

425

{

426

if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;

427

switch( cachesim_ref_wb( &L2, Read, a, size) ) {

428

case Hit: return L2_Hit;

429

case Miss: return MemAccess;

430

default: break;

431

}

432

return WriteBackMemAccess;

433

}

434

435

static

436

CacheModelResult cachesim_D1_Read(Addr a, UChar size)

437

{

438

if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;

439

switch( cachesim_ref_wb( &L2, Read, a, size) ) {

440

case Hit: return L2_Hit;

441

case Miss: return MemAccess;

442

default: break;

443

}

444

return WriteBackMemAccess;

445

}

446

447

static

448

CacheModelResult cachesim_D1_Write(Addr a, UChar size)

449

{

450

if ( cachesim_ref( &D1, a, size) == Hit ) {

451

/* Even for a L1 hit, the write-trough L1 passes

452

* the write to the L2 to make the L2 line dirty.

453

* But this causes no latency, so return the hit.

454

455

cachesim_ref_wb( &L2, Write, a, size);

456

return L1_Hit;

457

}

458

switch( cachesim_ref_wb( &L2, Write, a, size) ) {

459

case Hit: return L2_Hit;

460

case Miss: return MemAccess;

461

default: break;

462

}

463

return WriteBackMemAccess;

464

}

465

466

467

/*------------------------------------------------------------*/

468

/*--- Hardware Prefetch Simulation ---*/

469

/*------------------------------------------------------------*/

470

471

static ULong prefetch_up = 0;

472

static ULong prefetch_down = 0;

473

474

#define PF_STREAMS 8

475

#define PF_PAGEBITS 12

476

477

static UInt pf_lastblock[PF_STREAMS];

478

static Int pf_seqblocks[PF_STREAMS];

479

480

static

481

void prefetch_clear(void)

482

{

483

int i;

484

for(i=0;i<PF_STREAMS;i++)

485

pf_lastblock[i] = pf_seqblocks[i] = 0;

486

}

487

488

489

* HW Prefetch emulation

490

* Start prefetching when detecting sequential access to 3 memory blocks.

491

* One stream can be detected per 4k page.

492

493

static __inline__

494

void prefetch_L2_doref(Addr a, UChar size)

495

{

496

UInt stream = (a >> PF_PAGEBITS) % PF_STREAMS;

497

UInt block = ( a >> L2.line_size_bits);

498

499

if (block != pf_lastblock[stream]) {

500

if (pf_seqblocks[stream] == 0) {

501

if (pf_lastblock[stream] +1 == block) pf_seqblocks[stream]++;

502

else if (pf_lastblock[stream] -1 == block) pf_seqblocks[stream]--;

503

}

504

else if (pf_seqblocks[stream] >0) {

505

if (pf_lastblock[stream] +1 == block) {

506

pf_seqblocks[stream]++;

507

if (pf_seqblocks[stream] >= 2) {

508

prefetch_up++;

509

cachesim_ref(&L2, a + 5 * L2.line_size,1);

510

}

511

}

512

else pf_seqblocks[stream] = 0;

513

}

514

else if (pf_seqblocks[stream] <0) {

515

if (pf_lastblock[stream] -1 == block) {

516

pf_seqblocks[stream]--;

517

if (pf_seqblocks[stream] <= -2) {

518

prefetch_down++;

519

cachesim_ref(&L2, a - 5 * L2.line_size,1);

520

}

521

}

522

else pf_seqblocks[stream] = 0;

523

}

524

pf_lastblock[stream] = block;

525

}

526

}

527

528

/* simple model with hardware prefetch */

529

530

static

531

CacheModelResult prefetch_I1_ref(Addr a, UChar size)

532

{

533

if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;

534

prefetch_L2_doref(a,size);

535

if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;

536

return MemAccess;

537

}

538

539

static

540

CacheModelResult prefetch_D1_ref(Addr a, UChar size)

541

{

542

if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;

543

prefetch_L2_doref(a,size);

544

if ( cachesim_ref( &L2, a, size) == Hit ) return L2_Hit;

545

return MemAccess;

546

}

547

548

549

/* complex model with hardware prefetch */

550

551

static

552

CacheModelResult prefetch_I1_Read(Addr a, UChar size)

553

{

554

if ( cachesim_ref( &I1, a, size) == Hit ) return L1_Hit;

555

prefetch_L2_doref(a,size);

556

switch( cachesim_ref_wb( &L2, Read, a, size) ) {

557

case Hit: return L2_Hit;

558

case Miss: return MemAccess;

559

default: break;

560

}

561

return WriteBackMemAccess;

562

}

563

564

static

565

CacheModelResult prefetch_D1_Read(Addr a, UChar size)

566

{

567

if ( cachesim_ref( &D1, a, size) == Hit ) return L1_Hit;

568

prefetch_L2_doref(a,size);

569

switch( cachesim_ref_wb( &L2, Read, a, size) ) {

570

case Hit: return L2_Hit;

571

case Miss: return MemAccess;

572

default: break;

573

}

574

return WriteBackMemAccess;

575

}

576

577

static

578

CacheModelResult prefetch_D1_Write(Addr a, UChar size)

579

{

580

prefetch_L2_doref(a,size);

581

if ( cachesim_ref( &D1, a, size) == Hit ) {

582

/* Even for a L1 hit, the write-trough L1 passes

583

* the write to the L2 to make the L2 line dirty.

584

* But this causes no latency, so return the hit.

585

586

cachesim_ref_wb( &L2, Write, a, size);

587

return L1_Hit;

588

}

589

switch( cachesim_ref_wb( &L2, Write, a, size) ) {

590

case Hit: return L2_Hit;

591

case Miss: return MemAccess;

592

default: break;

593

}

594

return WriteBackMemAccess;

595

}

596

597

598

/*------------------------------------------------------------*/

599

/*--- Cache Simulation with use metric collection ---*/

600

/*------------------------------------------------------------*/

601

602

/* can not be combined with write-back or prefetch */

603

604

static

605

void cacheuse_initcache(cache_t2* c)

606

{

607

int i;

608

unsigned int start_mask, start_val;

609

unsigned int end_mask, end_val;

610

611

c->use = CLG_MALLOC(sizeof(line_use) * c->sets * c->assoc);

612

c->loaded = CLG_MALLOC(sizeof(line_loaded) * c->sets * c->assoc);

613

c->line_start_mask = CLG_MALLOC(sizeof(int) * c->line_size);

614

c->line_end_mask = CLG_MALLOC(sizeof(int) * c->line_size);

615

616

617

c->line_size_mask = c->line_size-1;

618

619

/* Meaning of line_start_mask/line_end_mask

620

* Example: for a given cache line, you get an access starting at

621

* byte offset 5, length 4, byte 5 - 8 was touched. For a cache

622

* line size of 32, you have 1 bit per byte in the mask:

623

624

* bit31 bit8 bit5 bit 0

625

* | | | |

626

* 11..111111100000 line_start_mask[5]

627

* 00..000111111111 line_end_mask[(5+4)-1]

628

629

* use_mask |= line_start_mask[5] && line_end_mask[8]

630

631

632

start_val = end_val = ~0;

633

if (c->line_size < 32) {

634

int bits_per_byte = 32/c->line_size;

635

start_mask = (1<<bits_per_byte)-1;

636

end_mask = start_mask << (32-bits_per_byte);

637

for(i=0;i<c->line_size;i++) {

638

c->line_start_mask[i] = start_val;

639

start_val = start_val & ~start_mask;

640

start_mask = start_mask << bits_per_byte;

641

642

c->line_end_mask[c->line_size-i-1] = end_val;

643

end_val = end_val & ~end_mask;

644

end_mask = end_mask >> bits_per_byte;

645

}

646

}

647

else {

648

int bytes_per_bit = c->line_size/32;

649

start_mask = 1;

650

end_mask = 1 << 31;

651

for(i=0;i<c->line_size;i++) {

652

c->line_start_mask[i] = start_val;

653

c->line_end_mask[c->line_size-i-1] = end_val;

654

if ( ((i+1)%bytes_per_bit) == 0) {

655

start_val &= ~start_mask;

656

end_val &= ~end_mask;

657

start_mask <<= 1;

658

end_mask >>= 1;

659

}

660

}

661

}

662

663

CLG_DEBUG(6, "Config %s:\n", c->desc_line);

664

for(i=0;i<c->line_size;i++) {

665

CLG_DEBUG(6, " [%2d]: start mask %8x, end mask %8x\n",

666

i, c->line_start_mask[i], c->line_end_mask[i]);

667

}

668

669

/* We use lower tag bits as offset pointers to cache use info.

670

* I.e. some cache parameters don't work.

671

672

if (c->tag_shift < c->assoc_bits) {

673

VG_(message)(Vg_DebugMsg,

674

"error: Use associativity < %d for cache use statistics!",

675

(1<<c->tag_shift) );

676

VG_(tool_panic)("Unsupported cache configuration");

677

}

678

}

679

680

/* FIXME: A little tricky */

681

#if 0

682

683

static __inline__

684

void cacheuse_update_hit(cache_t2* c, UInt high_idx, UInt low_idx, UInt use_mask)

685

{

686

int idx = (high_idx << c->assoc_bits) | low_idx;

687

688

c->use[idx].count ++;

689

c->use[idx].mask |= use_mask;

690

691

CLG_DEBUG(6," Hit [idx %d] (line %p from %p): %x => %08x, count %d\n",

692

idx, c->loaded[idx].memline, c->loaded[idx].iaddr,

693

use_mask, c->use[idx].mask, c->use[idx].count);

694

}

695

696

/* only used for I1, D1 */

697

698

static __inline__

699

CacheResult cacheuse_setref(cache_t2* c, UInt set_no, UWord tag)

700

{

701

int i, j, idx;

702

UWord *set, tmp_tag;

703

UInt use_mask;

704

705

/* Shifting is a bit faster than multiplying */

706

set = &(c->tags[set_no << c->assoc_bits]);

707

use_mask =

708

c->line_start_mask[a & c->line_size_mask] &

709

c->line_end_mask[(a+size-1) & c->line_size_mask];

710

711

/* This loop is unrolled for just the first case, which is the most */

712

/* common. We can't unroll any further because it would screw up */

713

/* if we have a direct-mapped (1-way) cache. */

714

if (tag == (set[0] & c->tag_mask)) {

715

cacheuse_update(c, set_no, set[0] & ~c->tag_mask, use_mask);

716

return L1_Hit;

717

}

718

719

/* If the tag is one other than the MRU, move it into the MRU spot */

720

/* and shuffle the rest down. */

721

for (i = 1; i < c->assoc; i++) {

722

if (tag == (set[i] & c->tag_mask)) {

723

tmp_tag = set[i];

724

for (j = i; j > 0; j--) {

725

set[j] = set[j - 1];

726

}

727

set[0] = tmp_tag;

728

729

cacheuse_update(c, set_no, tmp_tag & ~c->tag_mask, use_mask);

730

return L1_Hit;

731

}

732

}

733

734

/* A miss; install this tag as MRU, shuffle rest down. */

735

tmp_tag = set[L.assoc - 1] & ~c->tag_mask;

736

for (j = c->assoc - 1; j > 0; j--) {

737

set[j] = set[j - 1];

738

}

739

set[0] = tag | tmp_tag;

740

741

cacheuse_L2_miss(c, (set_no << c->assoc_bits) | tmp_tag,

742

use_mask, a & ~c->line_size_mask);

743

744

return Miss;

745

}

746

747

748

static CacheResult cacheuse_ref(cache_t2* c, Addr a, UChar size)

749

{

750

UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);

751

UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);

752

UWord tag = a >> c->tag_shift;

753

754

/* Access entirely within line. */

755

if (set1 == set2)

756

return cacheuse_setref(c, set1, tag);

757

758

/* Access straddles two lines. */

759

/* Nb: this is a fast way of doing ((set1+1) % c->sets) */

760

else if (((set1 + 1) & (c->sets-1)) == set2) {

761

762

/* the call updates cache structures as side effect */

763

CacheResult res1 = cacheuse_isMiss(c, set1, tag);

764

CacheResult res2 = cacheuse_isMiss(c, set2, tag);

765

return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;

766

767

} else {

768

VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2);

769

VG_(tool_panic)("item straddles more than two cache sets");

770

}

771

return Hit;

772

}

773

#endif

774

775

776

/* for I1/D1 caches */

777

#define CACHEUSE(L) \

778

779

static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) \

780

{ \

781

782

783

784

int i, j, idx; \

785

UWord *set, tmp_tag; \

786

UInt use_mask; \

787

788

CLG_DEBUG(6,"%s.Acc(Addr %p, size %d): Sets [%d/%d]\n", \

789

L.name, a, size, set1, set2); \

790

791

/* First case: word entirely within line. */ \

792

if (set1 == set2) { \

793

794

/* Shifting is a bit faster than multiplying */ \

795

set = &(L.tags[set1 << L.assoc_bits]); \

796

use_mask = L.line_start_mask[a & L.line_size_mask] & \

797

L.line_end_mask[(a+size-1) & L.line_size_mask]; \

798

799

/* This loop is unrolled for just the first case, which is the most */\

800

/* common. We can't unroll any further because it would screw up */\

801

/* if we have a direct-mapped (1-way) cache. */\

802

if (tag == (set[0] & L.tag_mask)) { \

803

idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask); \

804

L.use[idx].count ++; \

805

L.use[idx].mask |= use_mask; \

806

CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): %x => %08x, count %d\n",\

807

idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \

808

use_mask, L.use[idx].mask, L.use[idx].count); \

809

return L1_Hit; \

810

} \

811

/* If the tag is one other than the MRU, move it into the MRU spot */\

812

/* and shuffle the rest down. */\

813

for (i = 1; i < L.assoc; i++) { \

814

if (tag == (set[i] & L.tag_mask)) { \

815

tmp_tag = set[i]; \

816

for (j = i; j > 0; j--) { \

817

set[j] = set[j - 1]; \

818

} \

819

set[0] = tmp_tag; \

820

idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \

821

L.use[idx].count ++; \

822

L.use[idx].mask |= use_mask; \

823

CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): %x => %08x, count %d\n",\

824

i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \

825

use_mask, L.use[idx].mask, L.use[idx].count); \

826

return L1_Hit; \

827

} \

828

} \

829

830

/* A miss; install this tag as MRU, shuffle rest down. */ \

831

tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \

832

for (j = L.assoc - 1; j > 0; j--) { \

833

set[j] = set[j - 1]; \

834

} \

835

set[0] = tag | tmp_tag; \

836

idx = (set1 << L.assoc_bits) | tmp_tag; \

837

return update_##L##_use(&L, idx, \

838

use_mask, a &~ L.line_size_mask); \

839

840

/* Second case: word straddles two lines. */ \

841

/* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \

842

} else if (((set1 + 1) & (L.sets-1)) == set2) { \

843

Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:L2 miss */ \

844

set = &(L.tags[set1 << L.assoc_bits]); \

845

use_mask = L.line_start_mask[a & L.line_size_mask]; \

846

if (tag == (set[0] & L.tag_mask)) { \

847

idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask); \

848

L.use[idx].count ++; \

849

L.use[idx].mask |= use_mask; \

850

CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): %x => %08x, count %d\n",\

851

idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \

852

use_mask, L.use[idx].mask, L.use[idx].count); \

853

goto block2; \

854

} \

855

for (i = 1; i < L.assoc; i++) { \

856

if (tag == (set[i] & L.tag_mask)) { \

857

tmp_tag = set[i]; \

858

for (j = i; j > 0; j--) { \

859

set[j] = set[j - 1]; \

860

} \

861

set[0] = tmp_tag; \

862

idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \

863

L.use[idx].count ++; \

864

L.use[idx].mask |= use_mask; \

865

CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): %x => %08x, count %d\n",\

866

i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \

867

use_mask, L.use[idx].mask, L.use[idx].count); \

868

goto block2; \

869

} \

870

} \

871

tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \

872

for (j = L.assoc - 1; j > 0; j--) { \

873

set[j] = set[j - 1]; \

874

} \

875

set[0] = tag | tmp_tag; \

876

idx = (set1 << L.assoc_bits) | tmp_tag; \

877

miss1 = update_##L##_use(&L, idx, \

878

use_mask, a &~ L.line_size_mask); \

879

block2: \

880

set = &(L.tags[set2 << L.assoc_bits]); \

881

use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \

882

if (tag == (set[0] & L.tag_mask)) { \

883

idx = (set2 << L.assoc_bits) | (set[0] & ~L.tag_mask); \

884

L.use[idx].count ++; \

885

L.use[idx].mask |= use_mask; \

886

CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): %x => %08x, count %d\n",\

887

idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \

888

use_mask, L.use[idx].mask, L.use[idx].count); \

889

return miss1; \

890

} \

891

for (i = 1; i < L.assoc; i++) { \

892

if (tag == (set[i] & L.tag_mask)) { \

893

tmp_tag = set[i]; \

894

for (j = i; j > 0; j--) { \

895

set[j] = set[j - 1]; \

896

} \

897

set[0] = tmp_tag; \

898

idx = (set2 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \

899

L.use[idx].count ++; \

900

L.use[idx].mask |= use_mask; \

901

CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): %x => %08x, count %d\n",\

902

i, idx, L.loaded[idx].memline, L.loaded[idx].iaddr, \

903

use_mask, L.use[idx].mask, L.use[idx].count); \

904

return miss1; \

905

} \

906

} \

907

tmp_tag = set[L.assoc - 1] & ~L.tag_mask; \

908

for (j = L.assoc - 1; j > 0; j--) { \

909

set[j] = set[j - 1]; \

910

} \

911

set[0] = tag | tmp_tag; \

912

idx = (set2 << L.assoc_bits) | tmp_tag; \

913

miss2 = update_##L##_use(&L, idx, \

914

use_mask, (a+size-1) &~ L.line_size_mask); \

915

return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:L2_Hit; \

916

917

} else { \

918

VG_(printf)("addr: %p size: %u sets: %d %d", a, size, set1, set2); \

919

VG_(tool_panic)("item straddles more than two cache sets"); \

920

} \

921

return 0; \

922

}

923

924

925

/* logarithmic bitcounting algorithm, see

926

* http://graphics.stanford.edu/~seander/bithacks.html

927

928

static __inline__ unsigned int countBits(unsigned int bits)

929

{

930

unsigned int c; // store the total here

931

const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers

932

const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF};

933

934

c = bits;

935

c = ((c >> S[0]) & B[0]) + (c & B[0]);

936

c = ((c >> S[1]) & B[1]) + (c & B[1]);

937

c = ((c >> S[2]) & B[2]) + (c & B[2]);

938

c = ((c >> S[3]) & B[3]) + (c & B[3]);

939

c = ((c >> S[4]) & B[4]) + (c & B[4]);

940

return c;

941

}

942

943

static void update_L2_use(int idx, Addr memline)

944

{

945

line_loaded* loaded = &(L2.loaded[idx]);

946

line_use* use = &(L2.use[idx]);

947

int i = ((32 - countBits(use->mask)) * L2.line_size)>>5;

948

949

CLG_DEBUG(2, " L2.miss [%d]: at %p accessing memline %p\n",

950

idx, bb_base + current_ii->instr_offset, memline);

951

if (use->count>0) {

952

CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %p from %p]\n",

953

use->count, i, use->mask, loaded->memline, loaded->iaddr);

954

CLG_DEBUG(2, " collect: %d, use_base %p\n",

955

CLG_(current_state).collect, loaded->use_base);

956

957

if (CLG_(current_state).collect && loaded->use_base) {

958

(loaded->use_base)[off_L2_AcCost] += 1000 / use->count;

959

(loaded->use_base)[off_L2_SpLoss] += i;

960

}

961

}

962

963

use->count = 0;

964

use->mask = 0;

965

966

loaded->memline = memline;

967

loaded->iaddr = bb_base + current_ii->instr_offset;

968

loaded->use_base = (CLG_(current_state).nonskipped) ?

969

CLG_(current_state).nonskipped->skipped :

970

cost_base + current_ii->cost_offset;

971

}

972

973

static

974

CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded)

975

{

976

UInt setNo = (memline >> L2.line_size_bits) & (L2.sets_min_1);

977

UWord* set = &(L2.tags[setNo << L2.assoc_bits]);

978

UWord tag = memline & L2.tag_mask;

979

980

int i, j, idx;

981

UWord tmp_tag;

982

983

CLG_DEBUG(6,"L2.Acc(Memline %p): Set %d\n", memline, setNo);

984

985

if (tag == (set[0] & L2.tag_mask)) {

986

idx = (setNo << L2.assoc_bits) | (set[0] & ~L2.tag_mask);

987

l1_loaded->dep_use = &(L2.use[idx]);

988

989

CLG_DEBUG(6," Hit0 [idx %d] (line %p from %p): => %08x, count %d\n",

990

idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr,

991

L2.use[idx].mask, L2.use[idx].count);

992

return L2_Hit;

993

}

994

for (i = 1; i < L2.assoc; i++) {

995

if (tag == (set[i] & L2.tag_mask)) {

996

tmp_tag = set[i];

997

for (j = i; j > 0; j--) {

998

set[j] = set[j - 1];

999

}

1000

set[0] = tmp_tag;

1001

idx = (setNo << L2.assoc_bits) | (tmp_tag & ~L2.tag_mask);

1002

l1_loaded->dep_use = &(L2.use[idx]);

1003

1004

CLG_DEBUG(6," Hit%d [idx %d] (line %p from %p): => %08x, count %d\n",

1005

i, idx, L2.loaded[idx].memline, L2.loaded[idx].iaddr,

1006

L2.use[idx].mask, L2.use[idx].count);

1007

return L2_Hit;

1008

}

1009

}

1010

1011

/* A miss; install this tag as MRU, shuffle rest down. */

1012

tmp_tag = set[L2.assoc - 1] & ~L2.tag_mask;

1013

for (j = L2.assoc - 1; j > 0; j--) {

1014

set[j] = set[j - 1];

1015

}

1016

set[0] = tag | tmp_tag;

1017

idx = (setNo << L2.assoc_bits) | tmp_tag;

1018

l1_loaded->dep_use = &(L2.use[idx]);

1019

1020

update_L2_use(idx, memline);

1021

1022

return MemAccess;

1023

}

1024

1025

1026

1027

1028

#define UPDATE_USE(L) \

1029

1030

static CacheModelResult update##_##L##_use(cache_t2* cache, int idx, \

1031

UInt mask, Addr memline) \

1032

{ \

1033

line_loaded* loaded = &(cache->loaded[idx]); \

1034

line_use* use = &(cache->use[idx]); \

1035

int c = ((32 - countBits(use->mask)) * cache->line_size)>>5; \

1036

1037

CLG_DEBUG(2, " %s.miss [%d]: at %p accessing memline %p (mask %08x)\n", \

1038

cache->name, idx, bb_base + current_ii->instr_offset, memline, mask); \

1039

if (use->count>0) { \

1040

CLG_DEBUG(2, " old: used %d, loss bits %d (%08x) [line %p from %p]\n",\

1041

use->count, c, use->mask, loaded->memline, loaded->iaddr); \

1042

CLG_DEBUG(2, " collect: %d, use_base %p\n", \

1043

CLG_(current_state).collect, loaded->use_base); \

1044

1045

if (CLG_(current_state).collect && loaded->use_base) { \

1046

(loaded->use_base)[off_##L##_AcCost] += 1000 / use->count; \

1047

(loaded->use_base)[off_##L##_SpLoss] += c; \

1048

1049

/* FIXME (?): L1/L2 line sizes must be equal ! */ \

1050

loaded->dep_use->mask |= use->mask; \

1051

loaded->dep_use->count += use->count; \

1052

} \

1053

} \

1054

1055

use->count = 1; \

1056

use->mask = mask; \

1057

loaded->memline = memline; \

1058

loaded->iaddr = bb_base + current_ii->instr_offset; \

1059

loaded->use_base = (CLG_(current_state).nonskipped) ? \

1060

CLG_(current_state).nonskipped->skipped : \

1061

cost_base + current_ii->cost_offset; \

1062

1063

if (memline == 0) return L2_Hit; \

1064

return cacheuse_L2_access(memline, loaded); \

1065

}

1066

1067

UPDATE_USE(I1);

1068

UPDATE_USE(D1);

1069

1070

CACHEUSE(I1);

1071

CACHEUSE(D1);

1072

1073

1074

static

1075

void cacheuse_finish(void)

1076

{

1077

int i;

1078

InstrInfo ii = { 0,0,0,0,0 };

1079

1080

if (!CLG_(current_state).collect) return;

1081

1082

bb_base = 0;

1083

current_ii = &ii;

1084

cost_base = 0;

1085

1086

/* update usage counters */

1087

if (I1.use)

1088

for (i = 0; i < I1.sets * I1.assoc; i++)

1089

if (I1.loaded[i].use_base)

1090

update_I1_use( &I1, i, 0,0);

1091

1092

if (D1.use)

1093

for (i = 0; i < D1.sets * D1.assoc; i++)

1094

if (D1.loaded[i].use_base)

1095

update_D1_use( &D1, i, 0,0);

1096

1097

if (L2.use)

1098

for (i = 0; i < L2.sets * L2.assoc; i++)

1099

if (L2.loaded[i].use_base)

1100

update_L2_use(i, 0);

1101

}

1102

1103

1104

1105

/*------------------------------------------------------------*/

1106

/*--- Helper functions called by instrumented code ---*/

1107

/*------------------------------------------------------------*/

1108

1109

1110

static __inline__

1111

void inc_costs(CacheModelResult r, ULong* c1, ULong* c2)

1112

{

1113

switch(r) {

1114

case WriteBackMemAccess:

1115

if (clo_simulate_writeback) {

1116

c1[3]++;

1117

c2[3]++;

1118

}

1119

// fall through

1120

1121

case MemAccess:

1122

c1[2]++;

1123

c2[2]++;

1124

// fall through

1125

1126

case L2_Hit:

1127

c1[1]++;

1128

c2[1]++;

1129

// fall through

1130

1131

default:

1132

c1[0]++;

1133

c2[0]++;

1134

}

1135

}

1136

1137

1138

VG_REGPARM(1)

1139

static void log_1I0D(InstrInfo* ii)

1140

{

1141

CacheModelResult IrRes;

1142

1143

current_ii = ii;

1144

IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);

1145

1146

CLG_DEBUG(6, "log_1I0D: Ir=%p/%u => Ir %d\n",

1147

bb_base + ii->instr_offset, ii->instr_size, IrRes);

1148

1149

if (CLG_(current_state).collect) {

1150

ULong* cost_Ir;

1151

1152

if (CLG_(current_state).nonskipped)

1153

cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir;

1154

else

1155

cost_Ir = cost_base + ii->cost_offset + off_D0_Ir;

1156

1157

inc_costs(IrRes, cost_Ir,

1158

CLG_(current_state).cost + CLG_(sets).off_full_Ir );

1159

}

1160

}

1161

1162

1163

/* Instruction doing a read access */

1164

1165

VG_REGPARM(2)

1166

static void log_1I1Dr(InstrInfo* ii, Addr data)

1167

{

1168

CacheModelResult IrRes, DrRes;

1169

1170

current_ii = ii;

1171

IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);

1172

DrRes = (*simulator.D1_Read)(data, ii->data_size);

1173

1174

CLG_DEBUG(6, "log_1I1Dr: Ir=%p/%u, Dr=%p/%u => Ir %d, Dr %d\n",

1175

bb_base + ii->instr_offset, ii->instr_size,

1176

data, ii->data_size, IrRes, DrRes);

1177

1178

if (CLG_(current_state).collect) {

1179

ULong *cost_Ir, *cost_Dr;

1180

1181

if (CLG_(current_state).nonskipped) {

1182

cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Ir;

1183

cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr;

1184

}

1185

else {

1186

cost_Ir = cost_base + ii->cost_offset + off_D1r_Ir;

1187

cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr;

1188

}

1189

1190

inc_costs(IrRes, cost_Ir,

1191

CLG_(current_state).cost + CLG_(sets).off_full_Ir );

1192

inc_costs(DrRes, cost_Dr,

1193

CLG_(current_state).cost + CLG_(sets).off_full_Dr );

1194

}

1195

}

1196

1197

1198

VG_REGPARM(2)

1199

static void log_0I1Dr(InstrInfo* ii, Addr data)

1200

{

1201

CacheModelResult DrRes;

1202

1203

current_ii = ii;

1204

DrRes = (*simulator.D1_Read)(data, ii->data_size);

1205

1206

CLG_DEBUG(6, "log_0I1Dr: Dr=%p/%u => Dr %d\n",

1207

data, ii->data_size, DrRes);

1208

1209

if (CLG_(current_state).collect) {

1210

ULong *cost_Dr;

1211

1212

if (CLG_(current_state).nonskipped) {

1213

cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dr;

1214

}

1215

else {

1216

cost_Dr = cost_base + ii->cost_offset + off_D1r_Dr;

1217

}

1218

1219

inc_costs(DrRes, cost_Dr,

1220

CLG_(current_state).cost + CLG_(sets).off_full_Dr );

1221

}

1222

}

1223

1224

1225

/* Instruction doing a write access */

1226

1227

VG_REGPARM(2)

1228

static void log_1I1Dw(InstrInfo* ii, Addr data)

1229

{

1230

CacheModelResult IrRes, DwRes;

1231

1232

current_ii = ii;

1233

IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);

1234

DwRes = (*simulator.D1_Write)(data, ii->data_size);

1235

1236

CLG_DEBUG(6, "log_1I1Dw: Ir=%p/%u, Dw=%p/%u => Ir %d, Dw %d\n",

1237

bb_base + ii->instr_offset, ii->instr_size,

1238

data, ii->data_size, IrRes, DwRes);

1239

1240

if (CLG_(current_state).collect) {

1241

ULong *cost_Ir, *cost_Dw;

1242

1243

if (CLG_(current_state).nonskipped) {

1244

cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir;

1245

cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw;

1246

}

1247

else {

1248

cost_Ir = cost_base + ii->cost_offset + off_D1w_Ir;

1249

cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw;

1250

}

1251

1252

inc_costs(IrRes, cost_Ir,

1253

CLG_(current_state).cost + CLG_(sets).off_full_Ir );

1254

inc_costs(DwRes, cost_Dw,

1255

CLG_(current_state).cost + CLG_(sets).off_full_Dw );

1256

}

1257

}

1258

1259

VG_REGPARM(2)

1260

static void log_0I1Dw(InstrInfo* ii, Addr data)

1261

{

1262

CacheModelResult DwRes;

1263

1264

current_ii = ii;

1265

DwRes = (*simulator.D1_Write)(data, ii->data_size);

1266

1267

CLG_DEBUG(6, "log_0I1Dw: Dw=%p/%u => Dw %d\n",

1268

data, ii->data_size, DwRes);

1269

1270

if (CLG_(current_state).collect) {

1271

ULong *cost_Dw;

1272

1273

if (CLG_(current_state).nonskipped) {

1274

cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_full_Dw;

1275

}

1276

else {

1277

cost_Dw = cost_base + ii->cost_offset + off_D1w_Dw;

1278

}

1279

1280

inc_costs(DwRes, cost_Dw,

1281

CLG_(current_state).cost + CLG_(sets).off_full_Dw );

1282

}

1283

}

1284

1285

/* Instruction doing a read and a write access */

1286

1287

VG_REGPARM(3)

1288

static void log_1I2D(InstrInfo* ii, Addr data1, Addr data2)

1289

{

1290

CacheModelResult IrRes, DrRes, DwRes;

1291

1292

current_ii = ii;

1293

IrRes = (*simulator.I1_Read)(bb_base + ii->instr_offset, ii->instr_size);

1294

DrRes = (*simulator.D1_Read)(data1, ii->data_size);

1295

DwRes = (*simulator.D1_Write)(data2, ii->data_size);

1296

1297

CLG_DEBUG(6,

1298

"log_1I2D: Ir=%p/%u, Dr=%p/%u, Dw=%p/%u => Ir %d, Dr %d, Dw %d\n",

1299

bb_base + ii->instr_offset, ii->instr_size,

1300

data1, ii->data_size, data2, ii->data_size, IrRes, DrRes, DwRes);

1301

1302

if (CLG_(current_state).collect) {

1303

ULong *cost_Ir, *cost_Dr, *cost_Dw;

1304

1305

if (CLG_(current_state).nonskipped) {

1306

cost_Ir = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Ir;

1307

cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr;

1308

cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw;

1309

}

1310

else {

1311

cost_Ir = cost_base + ii->cost_offset + off_D2_Ir;

1312

cost_Dr = cost_base + ii->cost_offset + off_D2_Dr;

1313

cost_Dw = cost_base + ii->cost_offset + off_D2_Dw;

1314

}

1315

1316

inc_costs(IrRes, cost_Ir,

1317

CLG_(current_state).cost + CLG_(sets).off_full_Ir );

1318

inc_costs(DrRes, cost_Dr,

1319

CLG_(current_state).cost + CLG_(sets).off_full_Dr );

1320

inc_costs(DwRes, cost_Dw,

1321

CLG_(current_state).cost + CLG_(sets).off_full_Dw );

1322

}

1323

}

1324

1325

VG_REGPARM(3)

1326

static void log_0I2D(InstrInfo* ii, Addr data1, Addr data2)

1327

{

1328

CacheModelResult DrRes, DwRes;

1329

1330

current_ii = ii;

1331

DrRes = (*simulator.D1_Read)(data1, ii->data_size);

1332

DwRes = (*simulator.D1_Write)(data2, ii->data_size);

1333

1334

CLG_DEBUG(6,

1335

"log_0D2D: Dr=%p/%u, Dw=%p/%u => Dr %d, Dw %d\n",

1336

data1, ii->data_size, data2, ii->data_size, DrRes, DwRes);

1337

1338

if (CLG_(current_state).collect) {

1339

ULong *cost_Dr, *cost_Dw;

1340

1341

if (CLG_(current_state).nonskipped) {

1342

cost_Dr = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dr;

1343

cost_Dw = CLG_(current_state).nonskipped->skipped + CLG_(sets).off_sim_Dw;

1344

}

1345

else {

1346

cost_Dr = cost_base + ii->cost_offset + off_D2_Dr;

1347

cost_Dw = cost_base + ii->cost_offset + off_D2_Dw;

1348

}

1349

1350

inc_costs(DrRes, cost_Dr,

1351

CLG_(current_state).cost + CLG_(sets).off_full_Dr );

1352

inc_costs(DwRes, cost_Dw,

1353

CLG_(current_state).cost + CLG_(sets).off_full_Dw );

1354

}

1355

}

1356

1357

1358

/*------------------------------------------------------------*/

1359

/*--- Cache configuration ---*/

1360

/*------------------------------------------------------------*/

1361

1362

#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })

1363

1364

static cache_t clo_I1_cache = UNDEFINED_CACHE;

1365

static cache_t clo_D1_cache = UNDEFINED_CACHE;

1366

static cache_t clo_L2_cache = UNDEFINED_CACHE;

1367

1368

1369

/* Checks cache config is ok; makes it so if not. */

1370

static

1371

void check_cache(cache_t* cache, Char *name)

1372

{

1373

/* First check they're all powers of two */

1374

if (-1 == VG_(log2)(cache->size)) {

1375

VG_(message)(Vg_UserMsg,

1376

"error: %s size of %dB not a power of two; aborting.",

1377

name, cache->size);

1378

VG_(exit)(1);

1379

}

1380

1381

if (-1 == VG_(log2)(cache->assoc)) {

1382

VG_(message)(Vg_UserMsg,

1383

"error: %s associativity of %d not a power of two; aborting.",

1384

name, cache->assoc);

1385

VG_(exit)(1);

1386

}

1387

1388

if (-1 == VG_(log2)(cache->line_size)) {

1389

VG_(message)(Vg_UserMsg,

1390

"error: %s line size of %dB not a power of two; aborting.",

1391

name, cache->line_size);

1392

VG_(exit)(1);

1393

}

1394

1395

// Then check line size >= 16 -- any smaller and a single instruction could

1396

// straddle three cache lines, which breaks a simulation assertion and is

1397

// stupid anyway.

1398

if (cache->line_size < MIN_LINE_SIZE) {

1399

VG_(message)(Vg_UserMsg,

1400

"error: %s line size of %dB too small; aborting.",

1401

name, cache->line_size);

1402

VG_(exit)(1);

1403

}

1404

1405

/* Then check cache size > line size (causes seg faults if not). */

1406

if (cache->size <= cache->line_size) {

1407

VG_(message)(Vg_UserMsg,

1408

"error: %s cache size of %dB <= line size of %dB; aborting.",

1409

name, cache->size, cache->line_size);

1410

VG_(exit)(1);

1411

}

1412

1413

/* Then check assoc <= (size / line size) (seg faults otherwise). */

1414

if (cache->assoc > (cache->size / cache->line_size)) {

1415

VG_(message)(Vg_UserMsg,

1416

"warning: %s associativity > (size / line size); aborting.", name);

1417

VG_(exit)(1);

1418

}

1419

}

1420

1421

static

1422

void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c)

1423

{

1424

#define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)

1425

1426

Int n_clos = 0;

1427

1428

// Count how many were defined on the command line.

1429

if (DEFINED(clo_I1_cache)) { n_clos++; }

1430

if (DEFINED(clo_D1_cache)) { n_clos++; }

1431

if (DEFINED(clo_L2_cache)) { n_clos++; }

1432

1433

// Set the cache config (using auto-detection, if supported by the

1434

// architecture)

1435

VG_(configure_caches)( I1c, D1c, L2c, (3 == n_clos) );

1436

1437

// Then replace with any defined on the command line.

1438

if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }

1439

if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }

1440

if (DEFINED(clo_L2_cache)) { *L2c = clo_L2_cache; }

1441

1442

// Then check values and fix if not acceptable.

1443

check_cache(I1c, "I1");

1444

check_cache(D1c, "D1");

1445

check_cache(L2c, "L2");

1446

1447

if (VG_(clo_verbosity) > 1) {

1448

VG_(message)(Vg_UserMsg, "Cache configuration used:");

1449

VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines",

1450

I1c->size, I1c->assoc, I1c->line_size);

1451

VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines",

1452

D1c->size, D1c->assoc, D1c->line_size);

1453

VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines",

1454

L2c->size, L2c->assoc, L2c->line_size);

1455

}

1456

#undef CMD_LINE_DEFINED

1457

}

1458

1459

1460

/* Initialize and clear simulator state */

1461

static void cachesim_post_clo_init(void)

1462

{

1463

/* Cache configurations. */

1464

cache_t I1c, D1c, L2c;

1465

1466

/* Initialize access handlers */

1467

if (!CLG_(clo).simulate_cache) {

1468

CLG_(cachesim).log_1I0D = 0;

1469

CLG_(cachesim).log_1I0D_name = "(no function)";

1470

1471

CLG_(cachesim).log_1I1Dr = 0;

1472

CLG_(cachesim).log_1I1Dw = 0;

1473

CLG_(cachesim).log_1I2D = 0;

1474

CLG_(cachesim).log_1I1Dr_name = "(no function)";

1475

CLG_(cachesim).log_1I1Dw_name = "(no function)";

1476

CLG_(cachesim).log_1I2D_name = "(no function)";

1477

1478

CLG_(cachesim).log_0I1Dr = 0;

1479

CLG_(cachesim).log_0I1Dw = 0;

1480

CLG_(cachesim).log_0I2D = 0;

1481

CLG_(cachesim).log_0I1Dr_name = "(no function)";

1482

CLG_(cachesim).log_0I1Dw_name = "(no function)";

1483

CLG_(cachesim).log_0I2D_name = "(no function)";

1484

return;

1485

}

1486

1487

/* Configuration of caches only needed with real cache simulation */

1488

configure_caches(&I1c, &D1c, &L2c);

1489

1490

I1.name = "I1";

1491

D1.name = "D1";

1492

L2.name = "L2";

1493

1494

cachesim_initcache(I1c, &I1);

1495

cachesim_initcache(D1c, &D1);

1496

cachesim_initcache(L2c, &L2);

1497

1498

/* the other cache simulators use the standard helpers

1499

* with dispatching via simulator struct */

1500

1501

CLG_(cachesim).log_1I0D = log_1I0D;

1502

CLG_(cachesim).log_1I0D_name = "log_1I0D";

1503

1504

CLG_(cachesim).log_1I1Dr = log_1I1Dr;

1505

CLG_(cachesim).log_1I1Dw = log_1I1Dw;

1506

CLG_(cachesim).log_1I2D = log_1I2D;

1507

CLG_(cachesim).log_1I1Dr_name = "log_1I1Dr";

1508

CLG_(cachesim).log_1I1Dw_name = "log_1I1Dw";

1509

CLG_(cachesim).log_1I2D_name = "log_1I2D";

1510

1511

CLG_(cachesim).log_0I1Dr = log_0I1Dr;

1512

CLG_(cachesim).log_0I1Dw = log_0I1Dw;

1513

CLG_(cachesim).log_0I2D = log_0I2D;

1514

CLG_(cachesim).log_0I1Dr_name = "log_0I1Dr";

1515

CLG_(cachesim).log_0I1Dw_name = "log_0I1Dw";

1516

CLG_(cachesim).log_0I2D_name = "log_0I2D";

1517

1518

if (clo_collect_cacheuse) {

1519

1520

/* Output warning for not supported option combinations */

1521

if (clo_simulate_hwpref) {

1522

VG_(message)(Vg_DebugMsg,

1523

"warning: prefetch simulation can not be used with cache usage");

1524

clo_simulate_hwpref = False;

1525

}

1526

1527

if (clo_simulate_writeback) {

1528

VG_(message)(Vg_DebugMsg,

1529

"warning: write-back simulation can not be used with cache usage");

1530

clo_simulate_writeback = False;

1531

}

1532

1533

simulator.I1_Read = cacheuse_I1_doRead;

1534

simulator.D1_Read = cacheuse_D1_doRead;

1535

simulator.D1_Write = cacheuse_D1_doRead;

1536

return;

1537

}

1538

1539

if (clo_simulate_hwpref) {

1540

prefetch_clear();

1541

1542

if (clo_simulate_writeback) {

1543

simulator.I1_Read = prefetch_I1_Read;

1544

simulator.D1_Read = prefetch_D1_Read;

1545

simulator.D1_Write = prefetch_D1_Write;

1546

}

1547

else {

1548

simulator.I1_Read = prefetch_I1_ref;

1549

simulator.D1_Read = prefetch_D1_ref;

1550

simulator.D1_Write = prefetch_D1_ref;

1551

}

1552

1553

return;

1554

}

1555

1556

if (clo_simulate_writeback) {

1557

simulator.I1_Read = cachesim_I1_Read;

1558

simulator.D1_Read = cachesim_D1_Read;

1559

simulator.D1_Write = cachesim_D1_Write;

1560

}

1561

else {

1562

simulator.I1_Read = cachesim_I1_ref;

1563

simulator.D1_Read = cachesim_D1_ref;

1564

simulator.D1_Write = cachesim_D1_ref;

1565

}

1566

}

1567

1568

1569

/* Clear simulator state. Has to be initialized before */

1570

static

1571

void cachesim_clear(void)

1572

{

1573

cachesim_clearcache(&I1);

1574

cachesim_clearcache(&D1);

1575

cachesim_clearcache(&L2);

1576

1577

prefetch_clear();

1578

}

1579

1580

1581

static void cachesim_getdesc(Char* buf)

1582

{

1583

Int p;

1584

p = VG_(sprintf)(buf, "\ndesc: I1 cache: %s\n", I1.desc_line);

1585

p += VG_(sprintf)(buf+p, "desc: D1 cache: %s\n", D1.desc_line);

1586

VG_(sprintf)(buf+p, "desc: L2 cache: %s\n", L2.desc_line);

1587

}

1588

1589

static

1590

void cachesim_print_opts(void)

1591

{

1592

VG_(printf)(

1593

"\n cache simulator options:\n"

1594

" --simulate-cache=no|yes Do cache simulation [no]\n"

1595

" --simulate-wb=no|yes Count write-back events [no]\n"

1596

" --simulate-hwpref=no|yes Simulate hardware prefetch [no]\n"

1597

#if CLG_EXPERIMENTAL

1598

" --simulate-sectors=no|yes Simulate sectored behaviour [no]\n"

1599

#endif

1600

" --cacheuse=no|yes Collect cache block use [no]\n"

1601

" --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"

1602

" --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"

1603

" --L2=<size>,<assoc>,<line_size> set L2 cache manually\n"

1604

);

1605

}

1606

1607

static void parse_opt ( cache_t* cache, char* orig_opt, int opt_len )

1608

{

1609

int i1, i2, i3;

1610

int i;

1611

char *opt = VG_(strdup)(orig_opt);

1612

1613

i = i1 = opt_len;

1614

1615

/* Option looks like "--I1=65536,2,64".

1616

* Find commas, replace with NULs to make three independent

1617

* strings, then extract numbers. Yuck. */

1618

while (VG_(isdigit)(opt[i])) i++;

1619

if (',' == opt[i]) {

1620

opt[i++] = '\0';

1621

i2 = i;

1622

} else goto bad;

1623

while (VG_(isdigit)(opt[i])) i++;

1624

if (',' == opt[i]) {

1625

opt[i++] = '\0';

1626

i3 = i;

1627

} else goto bad;

1628

while (VG_(isdigit)(opt[i])) i++;

1629

if ('\0' != opt[i]) goto bad;

1630

1631

cache->size = (Int)VG_(atoll)(opt + i1);

1632

cache->assoc = (Int)VG_(atoll)(opt + i2);

1633

cache->line_size = (Int)VG_(atoll)(opt + i3);

1634

1635

VG_(free)(opt);

1636

1637

return;

1638

1639

bad:

1640

VG_(bad_option)(orig_opt);

1641

}

1642

1643

/* Check for command line option for cache configuration.

1644

* Return False if unknown and not handled.

1645

1646

* Called from CLG_(process_cmd_line_option)() in clo.c

1647

1648

static Bool cachesim_parse_opt(Char* arg)

1649

{

1650

if (0 == VG_(strcmp)(arg, "--simulate-wb=yes"))

1651

clo_simulate_writeback = True;

1652

else if (0 == VG_(strcmp)(arg, "--simulate-wb=no"))

1653

clo_simulate_writeback = False;

1654

1655

else if (0 == VG_(strcmp)(arg, "--simulate-hwpref=yes"))

1656

clo_simulate_hwpref = True;

1657

else if (0 == VG_(strcmp)(arg, "--simulate-hwpref=no"))

1658

clo_simulate_hwpref = False;

1659

1660

else if (0 == VG_(strcmp)(arg, "--simulate-sectors=yes"))

1661

clo_simulate_sectors = True;

1662

else if (0 == VG_(strcmp)(arg, "--simulate-sectors=no"))

1663

clo_simulate_sectors = False;

1664

1665

else if (0 == VG_(strcmp)(arg, "--cacheuse=yes")) {

1666

clo_collect_cacheuse = True;

1667

/* Use counters only make sense with fine dumping */

1668

CLG_(clo).dump_instr = True;

1669

}

1670

else if (0 == VG_(strcmp)(arg, "--cacheuse=no"))

1671

clo_collect_cacheuse = False;

1672

1673

/* 5 is length of "--I1=" */

1674

else if (0 == VG_(strncmp)(arg, "--I1=", 5))

1675

parse_opt(&clo_I1_cache, arg, 5);

1676

else if (0 == VG_(strncmp)(arg, "--D1=", 5))

1677

parse_opt(&clo_D1_cache, arg, 5);

1678

else if (0 == VG_(strncmp)(arg, "--L2=", 5))

1679

parse_opt(&clo_L2_cache, arg, 5);

1680

else

1681

return False;

1682

1683

return True;

1684

}

1685

1686

/* Adds commas to ULong, right justifying in a field field_width wide, returns

1687

* the string in buf. */

1688

static

1689

Int commify(ULong n, int field_width, char* buf)

1690

{

1691

int len, n_commas, i, j, new_len, space;

1692

1693

VG_(sprintf)(buf, "%llu", n);

1694

len = VG_(strlen)(buf);

1695

n_commas = (len - 1) / 3;

1696

new_len = len + n_commas;

1697

space = field_width - new_len;

1698

1699

/* Allow for printing a number in a field_width smaller than it's size */

1700

if (space < 0) space = 0;

1701

1702

/* Make j = -1 because we copy the '\0' before doing the numbers in groups

1703

* of three. */

1704

for (j = -1, i = len ; i >= 0; i--) {

1705

buf[i + n_commas + space] = buf[i];

1706

1707

if ((i>0) && (3 == ++j)) {

1708

j = 0;

1709

n_commas--;

1710

buf[i + n_commas + space] = ',';

1711

}

1712

}

1713

/* Right justify in field. */

1714

for (i = 0; i < space; i++) buf[i] = ' ';

1715

return new_len;

1716

}

1717

1718

static

1719

void percentify(Int n, Int ex, Int field_width, char buf[])

1720

{

1721

int i, len, space;

1722

1723

VG_(sprintf)(buf, "%d.%d%%", n / ex, n % ex);

1724

len = VG_(strlen)(buf);

1725

space = field_width - len;

1726

if (space < 0) space = 0; /* Allow for v. small field_width */

1727

i = len;

1728

1729

/* Right justify in field */

1730

for ( ; i >= 0; i--) buf[i + space] = buf[i];

1731

for (i = 0; i < space; i++) buf[i] = ' ';

1732

}

1733

1734

static

1735

void cachesim_printstat(void)

1736

{

1737

FullCost total = CLG_(total_cost), D_total = 0;

1738

ULong L2_total_m, L2_total_mr, L2_total_mw,

1739

L2_total, L2_total_r, L2_total_w;

1740

char buf1[RESULTS_BUF_LEN],

1741

buf2[RESULTS_BUF_LEN],

1742

buf3[RESULTS_BUF_LEN];

1743

Int l1, l2, l3;

1744

Int p;

1745

1746

if ((VG_(clo_verbosity) >1) && clo_simulate_hwpref) {

1747

VG_(message)(Vg_DebugMsg, "Prefetch Up: %llu",

1748

prefetch_up);

1749

VG_(message)(Vg_DebugMsg, "Prefetch Down: %llu",

1750

prefetch_down);

1751

VG_(message)(Vg_DebugMsg, "");

1752

}

1753

1754

/* I cache results. Use the I_refs value to determine the first column

1755

* width. */

1756

l1 = commify(total[CLG_(sets).off_full_Ir], 0, buf1);

1757

VG_(message)(Vg_UserMsg, "I refs: %s", buf1);

1758

1759

if (!CLG_(clo).simulate_cache) return;

1760

1761

commify(total[CLG_(sets).off_full_Ir +1], l1, buf1);

1762

VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1);

1763

1764

commify(total[CLG_(sets).off_full_Ir +2], l1, buf1);

1765

VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1);

1766

1767

p = 100;

1768

1769

if (0 == total[CLG_(sets).off_full_Ir])

1770

total[CLG_(sets).off_full_Ir] = 1;

1771

1772

percentify(total[CLG_(sets).off_full_Ir+1] * 100 * p /

1773

total[CLG_(sets).off_full_Ir], p, l1+1, buf1);

1774

VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1);

1775

1776

percentify(total[CLG_(sets).off_full_Ir+2] * 100 * p /

1777

total[CLG_(sets).off_full_Ir], p, l1+1, buf1);

1778

VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1);

1779

VG_(message)(Vg_UserMsg, "");

1780

1781

/* D cache results.

1782

Use the D_refs.rd and D_refs.wr values to determine the

1783

* width of columns 2 & 3. */

1784

1785

D_total = CLG_(get_eventset_cost)( CLG_(sets).full );

1786

CLG_(init_cost)( CLG_(sets).full, D_total);

1787

CLG_(copy_cost)( CLG_(sets).Dr, D_total, total + CLG_(sets).off_full_Dr );

1788

CLG_(add_cost) ( CLG_(sets).Dw, D_total, total + CLG_(sets).off_full_Dw );

1789

1790

commify( D_total[0], l1, buf1);

1791

l2 = commify(total[CLG_(sets).off_full_Dr], 0, buf2);

1792

l3 = commify(total[CLG_(sets).off_full_Dw], 0, buf3);

1793

VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)",

1794

buf1, buf2, buf3);

1795

1796

commify( D_total[1], l1, buf1);

1797

commify(total[CLG_(sets).off_full_Dr+1], l2, buf2);

1798

commify(total[CLG_(sets).off_full_Dw+1], l3, buf3);

1799

VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)",

1800

buf1, buf2, buf3);

1801

1802

commify( D_total[2], l1, buf1);

1803

commify(total[CLG_(sets).off_full_Dr+2], l2, buf2);

1804

commify(total[CLG_(sets).off_full_Dw+2], l3, buf3);

1805

VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)",

1806

buf1, buf2, buf3);

1807

1808

p = 10;

1809

1810

if (0 == D_total[0]) D_total[0] = 1;

1811

if (0 == total[CLG_(sets).off_full_Dr]) total[CLG_(sets).off_full_Dr] = 1;

1812

if (0 == total[CLG_(sets).off_full_Dw]) total[CLG_(sets).off_full_Dw] = 1;

1813

1814

percentify( D_total[1] * 100 * p / D_total[0], p, l1+1, buf1);

1815

percentify(total[CLG_(sets).off_full_Dr+1] * 100 * p /

1816

total[CLG_(sets).off_full_Dr], p, l2+1, buf2);

1817

percentify(total[CLG_(sets).off_full_Dw+1] * 100 * p /

1818

total[CLG_(sets).off_full_Dw], p, l3+1, buf3);

1819

VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3);

1820

1821

percentify( D_total[2] * 100 * p / D_total[0], p, l1+1, buf1);

1822

percentify(total[CLG_(sets).off_full_Dr+2] * 100 * p /

1823

total[CLG_(sets).off_full_Dr], p, l2+1, buf2);

1824

percentify(total[CLG_(sets).off_full_Dw+2] * 100 * p /

1825

total[CLG_(sets).off_full_Dw], p, l3+1, buf3);

1826

VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3);

1827

VG_(message)(Vg_UserMsg, "");

1828

1829

1830

1831

/* L2 overall results */

1832

1833

L2_total =

1834

total[CLG_(sets).off_full_Dr +1] +

1835

total[CLG_(sets).off_full_Dw +1] +

1836

total[CLG_(sets).off_full_Ir +1];

1837

L2_total_r =

1838

total[CLG_(sets).off_full_Dr +1] +

1839

total[CLG_(sets).off_full_Ir +1];

1840

L2_total_w = total[CLG_(sets).off_full_Dw +1];

1841

commify(L2_total, l1, buf1);

1842

commify(L2_total_r, l2, buf2);

1843

commify(L2_total_w, l3, buf3);

1844

VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)",

1845

buf1, buf2, buf3);

1846

1847

L2_total_m =

1848

total[CLG_(sets).off_full_Dr +2] +

1849

total[CLG_(sets).off_full_Dw +2] +

1850

total[CLG_(sets).off_full_Ir +2];

1851

L2_total_mr =

1852

total[CLG_(sets).off_full_Dr +2] +

1853

total[CLG_(sets).off_full_Ir +2];

1854

L2_total_mw = total[CLG_(sets).off_full_Dw +2];

1855

commify(L2_total_m, l1, buf1);

1856

commify(L2_total_mr, l2, buf2);

1857

commify(L2_total_mw, l3, buf3);

1858

VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)",

1859

buf1, buf2, buf3);

1860

1861

percentify(L2_total_m * 100 * p /

1862

(total[CLG_(sets).off_full_Ir] + D_total[0]), p, l1+1, buf1);

1863

percentify(L2_total_mr * 100 * p /

1864

(total[CLG_(sets).off_full_Ir] + total[CLG_(sets).off_full_Dr]),

1865

p, l2+1, buf2);

1866

percentify(L2_total_mw * 100 * p /

1867

total[CLG_(sets).off_full_Dw], p, l3+1, buf3);

1868

VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )",

1869

buf1, buf2,buf3);

1870

}

1871

1872

1873

/*------------------------------------------------------------*/

1874

/*--- Setup for Event set. ---*/

1875

/*------------------------------------------------------------*/

1876

1877

struct event_sets CLG_(sets);

1878

1879

void CLG_(init_eventsets)(Int max_user)

1880

{

1881

EventType * e1, *e2, *e3, *e4;

1882

EventSet *Ir, *Dr, *Dw;

1883

EventSet *D0, *D1r, *D1w, *D2;

1884

EventSet *sim, *full;

1885

EventSet *use;

1886

int sizeOfUseIr;

1887

1888

use = CLG_(get_eventset)("Use", 4);

1889

if (clo_collect_cacheuse) {

1890

/* if TUse is 0, there was never a load, and no loss, too */

1891

e1 = CLG_(register_eventtype)("AcCost1");

1892

CLG_(add_eventtype)(use, e1);

1893

e1 = CLG_(register_eventtype)("SpLoss1");

1894

CLG_(add_eventtype)(use, e1);

1895

e1 = CLG_(register_eventtype)("AcCost2");

1896

CLG_(add_eventtype)(use, e1);

1897

e1 = CLG_(register_eventtype)("SpLoss2");

1898

CLG_(add_eventtype)(use, e1);

1899

}

1900

1901

Ir = CLG_(get_eventset)("Ir", 4);

1902

Dr = CLG_(get_eventset)("Dr", 4);

1903

Dw = CLG_(get_eventset)("Dw", 4);

1904

if (CLG_(clo).simulate_cache) {

1905

e1 = CLG_(register_eventtype)("Ir");

1906

e2 = CLG_(register_eventtype)("I1mr");

1907

e3 = CLG_(register_eventtype)("I2mr");

1908

if (clo_simulate_writeback) {

1909

e4 = CLG_(register_eventtype)("I2dmr");

1910

CLG_(add_dep_event4)(Ir, e1,e2,e3,e4);

1911

}

1912

else

1913

CLG_(add_dep_event3)(Ir, e1,e2,e3);

1914

1915

e1 = CLG_(register_eventtype)("Dr");

1916

e2 = CLG_(register_eventtype)("D1mr");

1917

e3 = CLG_(register_eventtype)("D2mr");

1918

if (clo_simulate_writeback) {

1919

e4 = CLG_(register_eventtype)("D2dmr");

1920

CLG_(add_dep_event4)(Dr, e1,e2,e3,e4);

1921

}

1922

else

1923

CLG_(add_dep_event3)(Dr, e1,e2,e3);

1924

1925

e1 = CLG_(register_eventtype)("Dw");

1926

e2 = CLG_(register_eventtype)("D1mw");

1927

e3 = CLG_(register_eventtype)("D2mw");

1928

if (clo_simulate_writeback) {

1929

e4 = CLG_(register_eventtype)("D2dmw");

1930

CLG_(add_dep_event4)(Dw, e1,e2,e3,e4);

1931

}

1932

else

1933

CLG_(add_dep_event3)(Dw, e1,e2,e3);

1934

1935

}

1936

else {

1937

e1 = CLG_(register_eventtype)("Ir");

1938

CLG_(add_eventtype)(Ir, e1);

1939

}

1940

1941

sizeOfUseIr = use->size + Ir->size;

1942

D0 = CLG_(get_eventset)("D0", sizeOfUseIr);

1943

CLG_(add_eventset)(D0, use);

1944

off_D0_Ir = CLG_(add_eventset)(D0, Ir);

1945

1946

D1r = CLG_(get_eventset)("D1r", sizeOfUseIr + Dr->size);

1947

CLG_(add_eventset)(D1r, use);

1948

off_D1r_Ir = CLG_(add_eventset)(D1r, Ir);

1949

off_D1r_Dr = CLG_(add_eventset)(D1r, Dr);

1950

1951

D1w = CLG_(get_eventset)("D1w", sizeOfUseIr + Dw->size);

1952

CLG_(add_eventset)(D1w, use);

1953

off_D1w_Ir = CLG_(add_eventset)(D1w, Ir);

1954

off_D1w_Dw = CLG_(add_eventset)(D1w, Dw);

1955

1956

D2 = CLG_(get_eventset)("D2", sizeOfUseIr + Dr->size + Dw->size);

1957

CLG_(add_eventset)(D2, use);

1958

off_D2_Ir = CLG_(add_eventset)(D2, Ir);

1959

off_D2_Dr = CLG_(add_eventset)(D2, Dr);

1960

off_D2_Dw = CLG_(add_eventset)(D2, Dw);

1961

1962

sim = CLG_(get_eventset)("sim", sizeOfUseIr + Dr->size + Dw->size);

1963

CLG_(add_eventset)(sim, use);

1964

CLG_(sets).off_sim_Ir = CLG_(add_eventset)(sim, Ir);

1965

CLG_(sets).off_sim_Dr = CLG_(add_eventset)(sim, Dr);

1966

CLG_(sets).off_sim_Dw = CLG_(add_eventset)(sim, Dw);

1967

1968

if (CLG_(clo).collect_alloc) max_user += 2;

1969

if (CLG_(clo).collect_systime) max_user += 2;

1970

1971

full = CLG_(get_eventset)("full", sim->size + max_user);

1972

CLG_(add_eventset)(full, sim);

1973

CLG_(sets).off_full_Ir = CLG_(sets).off_sim_Ir;

1974

CLG_(sets).off_full_Dr = CLG_(sets).off_sim_Dr;

1975

CLG_(sets).off_full_Dw = CLG_(sets).off_sim_Dw;

1976

1977

CLG_(sets).use = use;

1978

CLG_(sets).Ir = Ir;

1979

CLG_(sets).Dr = Dr;

1980

CLG_(sets).Dw = Dw;

1981

1982

CLG_(sets).D0 = D0;

1983

CLG_(sets).D1r = D1r;

1984

CLG_(sets).D1w = D1w;

1985

CLG_(sets).D2 = D2;

1986

1987

CLG_(sets).sim = sim;

1988

CLG_(sets).full = full;

1989

1990

if (CLG_(clo).collect_alloc) {

1991

e1 = CLG_(register_eventtype)("allocCount");

1992

e2 = CLG_(register_eventtype)("allocSize");

1993

CLG_(sets).off_full_user = CLG_(add_dep_event2)(full, e1,e2);

1994

}

1995

1996

if (CLG_(clo).collect_systime) {

1997

e1 = CLG_(register_eventtype)("sysCount");

1998

e2 = CLG_(register_eventtype)("sysTime");

1999

CLG_(sets).off_full_systime = CLG_(add_dep_event2)(full, e1,e2);

2000

}

2001

2002

CLG_DEBUGIF(1) {

2003

CLG_DEBUG(1, "EventSets:\n");

2004

CLG_(print_eventset)(-2, use);

2005

CLG_(print_eventset)(-2, Ir);

2006

CLG_(print_eventset)(-2, Dr);

2007

CLG_(print_eventset)(-2, Dw);

2008

CLG_(print_eventset)(-2, sim);

2009

CLG_(print_eventset)(-2, full);

2010

}

2011

2012

/* Not-existing events are silently ignored */

2013

CLG_(dumpmap) = CLG_(get_eventmapping)(full);

2014

CLG_(append_event)(CLG_(dumpmap), "Ir");

2015

CLG_(append_event)(CLG_(dumpmap), "Dr");

2016

CLG_(append_event)(CLG_(dumpmap), "Dw");

2017

CLG_(append_event)(CLG_(dumpmap), "I1mr");

2018

CLG_(append_event)(CLG_(dumpmap), "D1mr");

2019

CLG_(append_event)(CLG_(dumpmap), "D1mw");

2020

CLG_(append_event)(CLG_(dumpmap), "I2mr");

2021

CLG_(append_event)(CLG_(dumpmap), "D2mr");

2022

CLG_(append_event)(CLG_(dumpmap), "D2mw");

2023

CLG_(append_event)(CLG_(dumpmap), "I2dmr");

2024

CLG_(append_event)(CLG_(dumpmap), "D2dmr");

2025

CLG_(append_event)(CLG_(dumpmap), "D2dmw");

2026

CLG_(append_event)(CLG_(dumpmap), "AcCost1");

2027

CLG_(append_event)(CLG_(dumpmap), "SpLoss1");

2028

CLG_(append_event)(CLG_(dumpmap), "AcCost2");

2029

CLG_(append_event)(CLG_(dumpmap), "SpLoss2");

2030

CLG_(append_event)(CLG_(dumpmap), "allocCount");

2031

CLG_(append_event)(CLG_(dumpmap), "allocSize");

2032

CLG_(append_event)(CLG_(dumpmap), "sysCount");

2033

CLG_(append_event)(CLG_(dumpmap), "sysTime");

2034

2035

}

2036

2037

2038

2039

static

2040

void add_and_zero_Dx(EventSet* es, SimCost dst, ULong* cost)

2041

{

2042

/* if eventset use is defined, it is always first (hardcoded!) */

2043

CLG_(add_and_zero_cost)( CLG_(sets).use, dst, cost);

2044

2045

/* FIXME: This is hardcoded... */

2046

if (es == CLG_(sets).D0) {

2047

CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,

2048

cost + off_D0_Ir);

2049

}

2050

else if (es == CLG_(sets).D1r) {

2051

CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,

2052

cost + off_D1r_Ir);

2053

CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr,

2054

cost + off_D1r_Dr);

2055

}

2056

else if (es == CLG_(sets).D1w) {

2057

CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,

2058

cost + off_D1w_Ir);

2059

CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw,

2060

cost + off_D1w_Dw);

2061

}

2062

else {

2063

CLG_ASSERT(es == CLG_(sets).D2);

2064

CLG_(add_and_zero_cost)( CLG_(sets).Ir, dst + CLG_(sets).off_sim_Ir,

2065

cost + off_D2_Ir);

2066

CLG_(add_and_zero_cost)( CLG_(sets).Dr, dst + CLG_(sets).off_sim_Dr,

2067

cost + off_D2_Dr);

2068

CLG_(add_and_zero_cost)( CLG_(sets).Dw, dst + CLG_(sets).off_sim_Dw,

2069

cost + off_D2_Dw);

2070

}

2071

}

2072

2073

/* this is called at dump time for every instruction executed */

2074

static void cachesim_add_icost(SimCost cost, BBCC* bbcc,

2075

InstrInfo* ii, ULong exe_count)

2076

{

2077

if (!CLG_(clo).simulate_cache)

2078

cost[CLG_(sets).off_sim_Ir] += exe_count;

2079

else {

2080

2081

#if 0

2082

/* There is always a trivial case where exe_count and Ir can be

2083

* slightly different because ecounter is updated when executing

2084

* the next BB. E.g. for last BB executed, or when toggling collection

2085

2086

/* FIXME: Hardcoded that each eventset has Ir as first */

2087

if ((bbcc->cost + ii->cost_offset)[0] != exe_count) {

2088

VG_(printf)("==> Ir %llu, exe %llu\n",

2089

(bbcc->cost + ii->cost_offset)[0], exe_count);

2090

CLG_(print_bbcc_cost)(-2, bbcc);

2091

//CLG_ASSERT((bbcc->cost + ii->cost_offset)[0] == exe_count);

2092

}

2093

#endif

2094

2095

add_and_zero_Dx(ii->eventset, cost,

2096

bbcc->cost + ii->cost_offset);

2097

}

2098

}

2099

2100

static

2101

void cachesim_after_bbsetup(void)

2102

{

2103

BBCC* bbcc = CLG_(current_state).bbcc;

2104

2105

if (CLG_(clo).simulate_cache) {

2106

BB* bb = bbcc->bb;

2107

2108

/* only needed if log_* functions are called */

2109

bb_base = bb->obj->offset + bb->offset;

2110

cost_base = bbcc->cost;

2111

}

2112

}

2113

2114

static

2115

void cachesim_finish(void)

2116

{

2117

if (clo_collect_cacheuse)

2118

cacheuse_finish();

2119

}

2120

2121

/*------------------------------------------------------------*/

2122

/*--- The simulator defined in this file ---*/

2123

/*------------------------------------------------------------*/

2124

2125

struct cachesim_if CLG_(cachesim) = {

2126

.print_opts = cachesim_print_opts,

2127

.parse_opt = cachesim_parse_opt,

2128

.post_clo_init = cachesim_post_clo_init,

2129

.clear = cachesim_clear,

2130

.getdesc = cachesim_getdesc,

2131

.printstat = cachesim_printstat,

2132

.add_icost = cachesim_add_icost,

2133

.after_bbsetup = cachesim_after_bbsetup,

2134

.finish = cachesim_finish,

2135

2136

/* these will be set by cachesim_post_clo_init */

2137

.log_1I0D = 0,

2138

2139

.log_1I1Dr = 0,

2140

.log_1I1Dw = 0,

2141

.log_1I2D = 0,

2142

2143

.log_0I1Dr = 0,

2144

.log_0I1Dw = 0,

2145

.log_0I2D = 0,

2146

2147

.log_1I0D_name = "(no function)",

2148

2149

.log_1I1Dr_name = "(no function)",

2150

.log_1I1Dw_name = "(no function)",

2151

.log_1I2D_name = "(no function)",

2152

2153

.log_0I1Dr_name = "(no function)",

2154

.log_0I1Dw_name = "(no function)",

2155

.log_0I2D_name = "(no function)"

2156

};

2157

2158

2159

/*--------------------------------------------------------------------*/

2160

/*--- end ct_sim.c ---*/

2161

/*--------------------------------------------------------------------*/

2162

Older »