如标题所述,为什么OpenJDK JVM在Windows x86上不发出预取指令?参见OpenJDK Mercurial @ http://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/c49dcaf78a65/src/os_cpu/windows_x86/vm/prefetch_windows_x86.inline.hpp
inline void Prefetch::read (void *loc, intx interval) {}
inline void Prefetch::write(void *loc, intx interval) {}
没有任何评论,除了源代码之外,我没有发现其他资源。我问是因为对于Linux x86确实如此,请参见http://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/c49dcaf78a65/src/os_cpu/linux_x86/vm/prefetch_linux_x86.inline.hpp
inline void Prefetch::read (void *loc, intx interval) {
#ifdef AMD64
__asm__ ("prefetcht0 (%0,%1,1)" : : "r" (loc), "r" (interval));
#endif // AMD64
}
inline void Prefetch::write(void *loc, intx interval) {
#ifdef AMD64
// Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
// __asm__ ("prefetchw (%0,%1,1)" : : "r" (loc), "r" (interval));
__asm__ ("prefetcht0 (%0,%1,1)" : : "r" (loc), "r" (interval));
#endif // AMD64
}
最佳答案
您引用的所有文件都有asm代码片段(inline assembler),某些C / C++软件在其自己的代码中使用了该片段(如apangin, the JVM expert pointed,主要在GC代码中使用)。实际上是有区别的:x86_64热点的Linux,Solaris和BSD变体已在热点中进行了预取,而Windows已禁用/未实现了它们,这在某种程度上是很奇怪的,部分原因无法解释,并且还可能使JVM有点(一些百分比;更多信息)。 Windows平台上没有硬件预取的平台)速度较慢,但仍无助于销售更多Solaris / solaris支付的Sun / Oracle支持合同。 Ross also guessed,MS C++编译器可能不支持内联asm语法,但应使用_mm_prefetch
(谁会打开JDK错误以将其添加为to the file?)。
JVM热点是JIT,并且JIT将JITted代码作为字节发出(生成)(虽然JIT可以将代码从其自身的函数复制到生成的代码中或发出对支持函数的调用,但预取作为字节在热点)。我们如何找到它的发射方式?一种简单的在线方法是,例如在github:cross-reference like metager上找到jdk8u的一些在线可搜索副本(或在https://github.com/JetBrains/jdk8u_hotspot中更好),然后搜索prefetch或prefetch emit或prefetchr或lir_prefetchr。有一些相关结果:
JVM在c1 compiler中的LIR / jdk8u_hotspot/src/cpu/x86/vm/assembler_x86.cpp
中发出的实际字节:
void Assembler::prefetch_prefix(Address src) {
prefix(src);
emit_int8(0x0F);
}
void Assembler::prefetchnta(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rax, src); // 0, src
}
void Assembler::prefetchr(Address src) {
assert(VM_Version::supports_3dnow_prefetch(), "must support");
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x0D);
emit_operand(rax, src); // 0, src
}
void Assembler::prefetcht0(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rcx, src); // 1, src
}
void Assembler::prefetcht1(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rdx, src); // 2, src
}
void Assembler::prefetcht2(Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x18);
emit_operand(rbx, src); // 3, src
}
void Assembler::prefetchw(Address src) {
assert(VM_Version::supports_3dnow_prefetch(), "must support");
InstructionMark im(this);
prefetch_prefix(src);
emit_int8(0x0D);
emit_operand(rcx, src); // 1, src
}
在c1 LIR中的用法:
src/share/vm/c1/c1_LIRAssembler.cpp
void LIR_Assembler::emit_op1(LIR_Op1* op) {
switch (op->code()) {
...
case lir_prefetchr:
prefetchr(op->in_opr());
break;
case lir_prefetchw:
prefetchw(op->in_opr());
break;
现在我们知道了the opcode
lir_prefetchr
and can search for it或OpenGrok xref和lir_prefetchw,找到是 src/share/vm/c1/c1_LIR.cpp
中的唯一示例void LIR_List::prefetch(LIR_Address* addr, bool is_store) {
append(new LIR_Op1(
is_store ? lir_prefetchw : lir_prefetchr,
LIR_OprFact::address(addr)));
}
在其他地方定义了预取指令(对于C2,称为noted by apangin),the
src/cpu/x86/vm/x86_64.ad
:// Prefetch instructions. ...
instruct prefetchr( memory mem ) %{
predicate(ReadPrefetchInstr==3);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
ins_encode %{
__ prefetchr($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchrNTA( memory mem ) %{
predicate(ReadPrefetchInstr==0);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchrT0( memory mem ) %{
predicate(ReadPrefetchInstr==1);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchrT2( memory mem ) %{
predicate(ReadPrefetchInstr==2);
match(PrefetchRead mem);
ins_cost(125);
format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchwNTA( memory mem ) %{
match(PrefetchWrite mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
// Prefetch instructions for allocation.
instruct prefetchAlloc( memory mem ) %{
predicate(AllocatePrefetchInstr==3);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
ins_encode %{
__ prefetchw($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocNTA( memory mem ) %{
predicate(AllocatePrefetchInstr==0);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
ins_encode %{
__ prefetchnta($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocT0( memory mem ) %{
predicate(AllocatePrefetchInstr==1);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
ins_encode %{
__ prefetcht0($mem$$Address);
%}
ins_pipe(ialu_mem);
%}
instruct prefetchAllocT2( memory mem ) %{
predicate(AllocatePrefetchInstr==2);
match(PrefetchAllocation mem);
ins_cost(125);
format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
ins_encode %{
__ prefetcht2($mem$$Address);
%}
ins_pipe(ialu_mem);
%}