Skip to content

Commit

Permalink
Stackless issue #181: Replace slp_dont_optimize... vars
Browse files Browse the repository at this point in the history
by more appropriate compiler specific code. The new code should work
with whole program optimisation.

(cherry picked from commit 464ef17bcbf634ae33b37672a5b803f0ce940086)
  • Loading branch information
Anselm Kruis committed Nov 11, 2018
1 parent 49e3ede commit a8cf08d
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 22 deletions.
12 changes: 5 additions & 7 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -4048,9 +4048,6 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)


#ifdef STACKLESS
/* a global write only dummy variable */
char _dont_optimise_away_slp_eval_frame_functions;

PyObject *
slp_eval_frame_noval(PyFrameObject *f, int throwflag, PyObject *retval)
{
Expand All @@ -4060,7 +4057,8 @@ slp_eval_frame_noval(PyFrameObject *f, int throwflag, PyObject *retval)
* it serves as a marker whether we expect a value or
* not, and it makes debugging a little easier.
*/
_dont_optimise_away_slp_eval_frame_functions = 1;
SLP_DO_NOT_OPTIMIZE_AWAY((char *)1);

r = slp_eval_frame_value(f, throwflag, retval);
return r;
}
Expand All @@ -4075,7 +4073,7 @@ slp_eval_frame_iter(PyFrameObject *f, int throwflag, PyObject *retval)
* for_iter operation. In this case we need to handle
* null without error as valid result.
*/
_dont_optimise_away_slp_eval_frame_functions = 2;
SLP_DO_NOT_OPTIMIZE_AWAY((char *)2);
r = slp_eval_frame_value(f, throwflag, retval);
return r;
}
Expand All @@ -4090,7 +4088,7 @@ slp_eval_frame_setup_with(PyFrameObject *f, int throwflag, PyObject *retval)
* SETUP_WITH operation.
* NOTE / XXX: see above.
*/
_dont_optimise_away_slp_eval_frame_functions = 3;
SLP_DO_NOT_OPTIMIZE_AWAY((char *)3);
r = slp_eval_frame_value(f, throwflag, retval);
return r;
}
Expand All @@ -4105,7 +4103,7 @@ slp_eval_frame_with_cleanup(PyFrameObject *f, int throwflag, PyObject *retval)
* WITH_CLEANUP operation.
* NOTE / XXX: see above.
*/
_dont_optimise_away_slp_eval_frame_functions = 4;
SLP_DO_NOT_OPTIMIZE_AWAY((char *)4);
r = slp_eval_frame_value(f, throwflag, retval);
return r;
}
Expand Down
15 changes: 7 additions & 8 deletions Stackless/core/slp_transfer.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ static PyTaskletObject *_prev;
#define SLP_EVAL
#include "platf/slp_platformselect.h"

SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS

#ifdef EXTERNAL_ASM
/* CCP addition: Make these functions, to be called from assembler.
* The token include file for the given platform should enable the
Expand Down Expand Up @@ -80,8 +82,6 @@ extern int slp_switch(void);

#endif

/* a write only variable used to prevent overly optimisation */
intptr_t *global_goobledigoobs;
static int
climb_stack_and_transfer(PyCStackObject **cstprev, PyCStackObject *cst,
PyTaskletObject *prev)
Expand All @@ -96,15 +96,14 @@ climb_stack_and_transfer(PyCStackObject **cstprev, PyCStackObject *cst,
intptr_t probe;
register ptrdiff_t needed = &probe - ts->st.cstack_base;
/* in rare cases, the need might have vanished due to the recursion */
register intptr_t *goobledigoobs;
if (needed > 0) {
goobledigoobs = alloca(needed * sizeof(intptr_t));
if (goobledigoobs == NULL)
register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t));
if (stack_ptr_tmp == NULL)
return -1;
/* hinder the compiler to optimise away
goobledigoobs and the alloca call.
/* hinder the compiler to optimise away
stack_ptr_tmp and the alloca call.
This happens with gcc 4.7.x and -O2 */
global_goobledigoobs = goobledigoobs;
SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp);
}
return slp_transfer(cstprev, cst, prev);
}
Expand Down
9 changes: 6 additions & 3 deletions Stackless/core/stacklesseval.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,14 @@ climb_stack_and_eval_frame(PyFrameObject *f)
intptr_t probe;
ptrdiff_t needed = &probe - ts->st.cstack_base;
/* in rare cases, the need might have vanished due to the recursion */
intptr_t *goobledigoobs;
if (needed > 0) {
goobledigoobs = alloca(needed * sizeof(intptr_t));
if (goobledigoobs == NULL)
register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t));
if (stack_ptr_tmp == NULL)
return NULL;
/* hinder the compiler to optimise away
stack_ptr_tmp and the alloca call.
This happens with gcc 4.7.x and -O2 */
SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp);
}
return slp_eval_frame(f);
}
Expand Down
3 changes: 1 addition & 2 deletions Stackless/module/scheduling.c
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,6 @@ typedef struct {
/* not a valid ptr and not a common integer */
#define SAVED_TSTATE_MAGIC1 (((intptr_t)transfer_with_exc)+1)
#define SAVED_TSTATE_MAGIC2 (-1*((intptr_t)transfer_with_exc))
saved_tstat_with_magic_t * _dont_optimise_away_saved_tstat_with_magic;

static int
transfer_with_exc(PyCStackObject **cstprev, PyCStackObject *cst, PyTaskletObject *prev)
Expand All @@ -376,7 +375,7 @@ transfer_with_exc(PyCStackObject **cstprev, PyCStackObject *cst, PyTaskletObject
/* prevent overly compiler optimisation.
We store the address of sm into a global variable.
This way the optimizer can't change the layout of the structure. */
_dont_optimise_away_saved_tstat_with_magic = &sm;
SLP_DO_NOT_OPTIMIZE_AWAY(&sm);

sm.s.tracing = ts->tracing;
sm.s.c_profilefunc = ts->c_profilefunc;
Expand Down
49 changes: 47 additions & 2 deletions Stackless/platf/slp_platformselect.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
#elif defined(__GNUC__) && defined(sparc) && defined(sun)
#include "switch_sparc_sun_gcc.h" /* SunOS sparc with gcc */
#elif defined(__GNUC__) && defined(__s390__) && defined(__linux__)
#include "switch_s390_unix.h" /* Linux/S390 */
#include "switch_s390_unix.h" /* Linux/S390 */
#elif defined(__GNUC__) && defined(__s390x__) && defined(__linux__)
#include "switch_s390_unix.h" /* Linux/S390 zSeries (identical) */
#include "switch_s390_unix.h" /* Linux/S390 zSeries (identical) */
#elif defined(__GNUC__) && defined(__arm__) && defined(__thumb__)
#include "switch_arm_thumb_gcc.h" /* gcc using arm thumb */
#elif defined(__GNUC__) && defined(__arm32__)
Expand All @@ -32,6 +32,51 @@

/* default definitions if not defined in above files */

/*
* Call SLP_DO_NOT_OPTIMIZE_AWAY(pointer) to ensure that pointer will be
* computed even post-optimization. Use it for pointers that are computed but
* otherwise are useless. The compiler tends to do a good job at eliminating
* unused variables, and this macro fools it into thinking var is in fact
* needed.
*/

#ifndef SLP_DO_NOT_OPTIMIZE_AWAY

/* Code is based on Facebook folly
* https://github.com/facebook/folly/blob/master/folly/Benchmark.h,
* which has an Apache 2 license.
*/
#ifdef _MSC_VER

#pragma optimize("", off)

static inline void doNotOptimizeDependencySink(const void* p) {}

#pragma optimize("", on)

#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) doNotOptimizeDependencySink(pointer)
#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */

#elif (defined(__GNUC__) || defined(__clang__))
/*
* The "r" constraint forces the compiler to make datum available
* in a register to the asm block, which means that it must have
* computed/loaded it.
*/
#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \
do {__asm__ volatile("" ::"r"(pointer));} while(0)
#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */
#else
/*
* Unknown compiler
*/
#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \
do { slp_do_not_opimize_away_sink = ((void*)(pointer)); } while(0)
extern uint8_t* volatile slp_do_not_opimize_away_sink;
#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS uint8_t* volatile slp_do_not_opimize_away_sink;
#endif
#endif

/* adjust slots to typical size of a few recursions on your system */

#ifndef CSTACK_SLOTS
Expand Down

0 comments on commit a8cf08d

Please sign in to comment.