From 5bff05b03df111d77654d6eefe52872dbeae01d6 Mon Sep 17 00:00:00 2001 From: goetz Date: Thu, 12 Sep 2013 13:51:13 -0700 Subject: [PATCH] 8024344: PPC64 (part 112): C argument in register AND stack slot. Summary: On PPC, the first 13 floating point arguments to C calls are passed in floating point registers. Also, all but the first 8 arguments are passed on the stack. So there can be floating point arguments that are passed on the stack and in a register. We duplicate the regs datastructure in c_calling_convention() to represent this. Reviewed-by: kvn, cjplummer --- src/cpu/sparc/vm/sharedRuntime_sparc.cpp | 6 ++++-- src/cpu/sparc/vm/sparc.ad | 4 ++-- src/cpu/x86/vm/sharedRuntime_x86_32.cpp | 6 ++++-- src/cpu/x86/vm/sharedRuntime_x86_64.cpp | 6 ++++-- src/cpu/x86/vm/x86_32.ad | 2 +- src/cpu/x86/vm/x86_64.ad | 2 +- src/cpu/zero/vm/globals_zero.hpp | 1 + src/cpu/zero/vm/sharedRuntime_zero.cpp | 1 + src/cpu/zero/vm/shark_globals_zero.hpp | 1 - src/share/vm/c1/c1_FrameMap.cpp | 2 +- src/share/vm/runtime/sharedRuntime.hpp | 10 +++++++++- 11 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp index 3149dbd76..bfc6ba4c2 100644 --- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp @@ -1118,7 +1118,9 @@ static VMReg int_stk_helper( int i ) { int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, + VMRegPair *regs2, int total_args_passed) { + assert(regs2 == NULL, "not needed on sparc"); // Return the number of VMReg stack_slots needed for the args. // This value does not include an abi space (like register window @@ -2096,7 +2098,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // the 1st six register arguments). It's weird see int_stk_helper. // int out_arg_slots; - out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); if (is_critical_native) { // Critical natives may have to call out so they need a save area @@ -2843,7 +2845,7 @@ nmethod *SharedRuntime::generate_dtrace_nmethod( // the 1st six register arguments). It's weird see int_stk_helper. // int out_arg_slots; - out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); // Calculate the total number of stack slots we will need. diff --git a/src/cpu/sparc/vm/sparc.ad b/src/cpu/sparc/vm/sparc.ad index a72bffdad..73f03533c 100644 --- a/src/cpu/sparc/vm/sparc.ad +++ b/src/cpu/sparc/vm/sparc.ad @@ -3218,7 +3218,7 @@ frame %{ // C. c_calling_convention %{ // This is obviously always outgoing - (void) SharedRuntime::c_calling_convention(sig_bt, regs, length); + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); %} // Location of native (C/C++) and interpreter return values. This is specified to @@ -9118,7 +9118,7 @@ instruct branch_short(label labl) %{ size(4); ins_cost(BRANCH_COST); format %{ "BA $labl\t! short branch" %} - ins_encode %{ + ins_encode %{ Label* L = $labl$$label; assert(__ use_cbcond(*L), "back to back cbcond"); __ ba_short(*L); diff --git a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp index 0fce7952a..071171e0c 100644 --- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp @@ -977,7 +977,9 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, + VMRegPair *regs2, int total_args_passed) { + assert(regs2 == NULL, "not needed on x86"); // We return the amount of VMRegImpl stack slots we need to reserve for all // the arguments NOT counting out_preserve_stack_slots. @@ -1624,7 +1626,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Now figure out where the args must be stored and how much stack space // they require. int out_arg_slots; - out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); // Compute framesize for the wrapper. We need to handlize all oops in // registers a max of 2 on x86. @@ -2495,7 +2497,7 @@ nmethod *SharedRuntime::generate_dtrace_nmethod( // they require (neglecting out_preserve_stack_slots). int out_arg_slots; - out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); // Calculate the total number of stack slots we will need. diff --git a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp index cbe960556..43e93806e 100644 --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp @@ -889,7 +889,9 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, + VMRegPair *regs2, int total_args_passed) { + assert(regs2 == NULL, "not needed on x86"); // We return the amount of VMRegImpl stack slots we need to reserve for all // the arguments NOT counting out_preserve_stack_slots. @@ -1857,7 +1859,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Now figure out where the args must be stored and how much stack space // they require. int out_arg_slots; - out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); // Compute framesize for the wrapper. We need to handlize all oops in // incoming registers @@ -2761,7 +2763,7 @@ nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, // the 1st six register arguments). It's weird see int_stk_helper. int out_arg_slots; - out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); // Calculate the total number of stack slots we will need. diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad index 316ebe291..ed76470e0 100644 --- a/src/cpu/x86/vm/x86_32.ad +++ b/src/cpu/x86/vm/x86_32.ad @@ -3755,7 +3755,7 @@ frame %{ // automatically biased by the preserve_stack_slots field above. c_calling_convention %{ // This is obviously always outgoing - (void) SharedRuntime::c_calling_convention(sig_bt, regs, length); + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); %} // Location of C & interpreter return values diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad index f550208e9..c280d049e 100644 --- a/src/cpu/x86/vm/x86_64.ad +++ b/src/cpu/x86/vm/x86_64.ad @@ -2941,7 +2941,7 @@ frame c_calling_convention %{ // This is obviously always outgoing - (void) SharedRuntime::c_calling_convention(sig_bt, regs, length); + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); %} // Location of compiled Java return values. Same as C for now. diff --git a/src/cpu/zero/vm/globals_zero.hpp b/src/cpu/zero/vm/globals_zero.hpp index 71b566fce..c0c6a857b 100644 --- a/src/cpu/zero/vm/globals_zero.hpp +++ b/src/cpu/zero/vm/globals_zero.hpp @@ -43,6 +43,7 @@ define_pd_global(bool, UncommonNullCast, true); define_pd_global(intx, CodeEntryAlignment, 32); define_pd_global(intx, OptoLoopAlignment, 16); define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 1000 ); define_pd_global(intx, PreInflateSpin, 10); define_pd_global(intx, StackYellowPages, 2); diff --git a/src/cpu/zero/vm/sharedRuntime_zero.cpp b/src/cpu/zero/vm/sharedRuntime_zero.cpp index 2c419b148..33b29be3e 100644 --- a/src/cpu/zero/vm/sharedRuntime_zero.cpp +++ b/src/cpu/zero/vm/sharedRuntime_zero.cpp @@ -135,6 +135,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, + VMRegPair *regs2, int total_args_passed) { ShouldNotCallThis(); return 0; diff --git a/src/cpu/zero/vm/shark_globals_zero.hpp b/src/cpu/zero/vm/shark_globals_zero.hpp index c04b225b8..edb7364c8 100644 --- a/src/cpu/zero/vm/shark_globals_zero.hpp +++ b/src/cpu/zero/vm/shark_globals_zero.hpp @@ -50,7 +50,6 @@ define_pd_global(intx, Tier4BackEdgeThreshold, 100000); define_pd_global(intx, OnStackReplacePercentage, 933 ); define_pd_global(intx, FreqInlineSize, 325 ); -define_pd_global(intx, InlineSmallCode, 1000 ); define_pd_global(uintx, NewRatio, 12 ); define_pd_global(intx, NewSizeThreadIncrease, 4*K ); define_pd_global(intx, InitialCodeCacheSize, 160*K); diff --git a/src/share/vm/c1/c1_FrameMap.cpp b/src/share/vm/c1/c1_FrameMap.cpp index f5eef6f0b..331690dcf 100644 --- a/src/share/vm/c1/c1_FrameMap.cpp +++ b/src/share/vm/c1/c1_FrameMap.cpp @@ -133,7 +133,7 @@ CallingConvention* FrameMap::c_calling_convention(const BasicTypeArray* signatur } } - intptr_t out_preserve = SharedRuntime::c_calling_convention(sig_bt, regs, sizeargs); + intptr_t out_preserve = SharedRuntime::c_calling_convention(sig_bt, regs, NULL, sizeargs); LIR_OprList* args = new LIR_OprList(signature->length()); for (i = 0; i < sizeargs;) { BasicType t = sig_bt[i]; diff --git a/src/share/vm/runtime/sharedRuntime.hpp b/src/share/vm/runtime/sharedRuntime.hpp index 5754536b9..8ca5da3b5 100644 --- a/src/share/vm/runtime/sharedRuntime.hpp +++ b/src/share/vm/runtime/sharedRuntime.hpp @@ -356,7 +356,15 @@ class SharedRuntime: AllStatic { const VMRegPair* regs) NOT_DEBUG_RETURN; // Ditto except for calling C - static int c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed); + // + // C argument in register AND stack slot. + // Some architectures require that an argument must be passed in a register + // AND in a stack slot. These architectures provide a second VMRegPair array + // to be filled by the c_calling_convention method. On other architectures, + // NULL is being passed as the second VMRegPair array, so arguments are either + // passed in a register OR in a stack slot. + static int c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, VMRegPair *regs2, + int total_args_passed); // Generate I2C and C2I adapters. These adapters are simple argument marshalling // blobs. Unlike adapters in the tiger and earlier releases the code in these -- GitLab