From efe26982c5af63b23e3136c94c059332c9838c79 Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Tue, 8 Aug 2023 22:11:41 -0400 Subject: [PATCH 01/14] Working on some space optimization --- harmony_model_checker/charm/charm.c | 215 ++++++---------------------- harmony_model_checker/charm/graph.c | 6 +- harmony_model_checker/charm/graph.h | 21 +-- 3 files changed, 55 insertions(+), 187 deletions(-) diff --git a/harmony_model_checker/charm/charm.c b/harmony_model_checker/charm/charm.c index 7c7590d7..9035d1f8 100644 --- a/harmony_model_checker/charm/charm.c +++ b/harmony_model_checker/charm/charm.c @@ -130,14 +130,6 @@ struct worker { hvalue_t stack[MAX_CONTEXT_STACK]; }; -// One of these per SCC worker thread -struct scc_worker { - struct global *global; // global state - double timeout; - barrier_t *scc_barrier; - void *scc_cache; // for SCC alloc/free -}; - #ifdef CACHE_LINE_ALIGNED #define ALIGNMASK 0x3F #else @@ -371,13 +363,15 @@ static void process_edge(struct worker *w, struct edge *edge, mutex_t *lock, str // TODOTODO next->to_parent = edge; +#ifdef notdef next->len = node->len + 1; next->steps = node->steps + edge->nsteps; +#endif next->index = -1; // for Tarjan edge->bwdnext = NULL; - next->next = *results; + next->u.ph1.next = *results; *results = next; w->count++; w->enqueued++; @@ -399,6 +393,7 @@ static void process_edge(struct worker *w, struct edge *edge, mutex_t *lock, str edge->fwdnext = *pe; *pe = edge; #else + // TODO. Do we need node->lock or can we just use the lock argument?? if (mutex_try_acquire(node->lock)) { edge->fwdnext = node->fwd; node->fwd = edge; @@ -1562,7 +1557,7 @@ static void path_output_microstep(struct global *global, FILE *file, static void path_output_macrostep(struct global *global, FILE *file, struct macrostep *macro, struct state *oldstate){ fprintf(file, " {\n"); fprintf(file, " \"id\": \"%d\",\n", macro->edge->dst->id); - fprintf(file, " \"len\": \"%d\",\n", macro->edge->dst->len); + // fprintf(file, " \"len\": \"%d\",\n", macro->edge->dst->len); fprintf(file, " \"tid\": \"%d\",\n", macro->tid); fprintf(file, " \"shared\": "); @@ -2008,7 +2003,7 @@ static enum busywait is_stuck( hvalue_t ctx, bool change ) { - if (node->component != start->component) { + if (node->u.ph2.component != start->u.ph2.component) { return BW_ESCAPE; } if (node->visited) { @@ -2063,6 +2058,7 @@ static void detect_busywait(struct minheap *failures, struct node *node){ } } +#ifdef notdef static int node_cmp(void *n1, void *n2){ struct node *node1 = n1, *node2 = n2; @@ -2074,11 +2070,16 @@ static int node_cmp(void *n1, void *n2){ } return node1->id - node2->id; } +#endif static int fail_cmp(void *f1, void *f2){ +#ifdef notdef struct failure *fail1 = f1, *fail2 = f2; return node_cmp(fail1->edge->dst, fail2->edge->dst); +#else + return 0; +#endif } void do_work1(struct worker *w, struct node *node, unsigned int level){ @@ -2636,7 +2637,7 @@ static void worker(void *arg){ assert(node->id == 0); node->id = w->node_id; global->graph.nodes[w->node_id++] = node; - w->results = node->next; + w->results = node->u.ph1.next; w->count--; } assert(w->results == NULL); @@ -2649,7 +2650,7 @@ static void worker(void *arg){ struct stack { struct stack *next; - unsigned int v1; + unsigned int v1; // TODO == v2->src struct edge *v2; } *stack_free; @@ -2678,15 +2679,6 @@ static void stack_pop(struct stack **sp, unsigned int *v1, struct edge **v2) { stack_free = s; } -static struct edge *node_adj(struct node *n, unsigned int pi) { - for (struct edge *e = n->fwd; e != NULL; e = e->fwdnext, pi--) { - if (pi == 0) { - return e; - } - } - return NULL; -} - static void tarjan(struct global *global){ unsigned int i = 0, comp_id = 0; struct stack *stack = NULL; @@ -2696,46 +2688,45 @@ static void tarjan(struct global *global){ if (n->index == -1) { stack_push(&call_stack, v, NULL); while (call_stack != NULL) { - unsigned int v1; - struct edge *e; + unsigned int v1; + struct edge *e; stack_pop(&call_stack, &v1, &e); n = global->graph.nodes[v1]; if (e == NULL) { n->index = i; - n->lowlink = i; + n->u.ph2.lowlink = i; i++; stack_push(&stack, v1, NULL); n->on_stack = true; - e = n->fwd; + e = n->fwd; } else { - if (e->dst->lowlink < n->lowlink) { - n->lowlink = e->dst->lowlink; + if (e->dst->u.ph2.lowlink < n->u.ph2.lowlink) { + n->u.ph2.lowlink = e->dst->u.ph2.lowlink; } - e = e->fwdnext; + e = e->fwdnext; } while (e != NULL) { struct node *w = e->dst; if (w->index < 0) { break; } - if (w->on_stack && w->index < n->lowlink) { - n->lowlink = w->index; + if (w->on_stack && w->index < n->u.ph2.lowlink) { + n->u.ph2.lowlink = w->index; } - e = e->fwdnext; + e = e->fwdnext; } if (e != NULL) { - // printf("TJ2 push %u %u, %u 0\n", v1, pi+1, e->dst->id); stack_push(&call_stack, v1, e); stack_push(&call_stack, e->dst->id, NULL); } - else if (n->lowlink == n->index) { + else if (n->u.ph2.lowlink == n->index) { for (;;) { unsigned int v2; stack_pop(&stack, &v2, NULL); struct node *n2 = global->graph.nodes[v2]; n2->on_stack = false; - n2->component = comp_id; + n2->u.ph2.component = comp_id; if (v2 == v1) { break; } @@ -2746,75 +2737,6 @@ static void tarjan(struct global *global){ } } global->ncomponents = comp_id; - // printf("TARJAN: %u\n", comp_id); - // for (unsigned int i = 0; i < global->graph.size; i++) { - // printf("%3u: %u\n", i, global->graph.nodes[i]->comp_id); - // } -} - -static void scc_worker(void *arg){ - struct scc_worker *w = arg; - struct global *global = w->global; - mutex_acquire(&global->todo_enter); - for (;;) { - if (global->scc_todo == NULL) { - global->scc_nwaiting++; - if (global->scc_nwaiting == global->nworkers) { - mutex_release(&global->todo_wait); - break; - } - mutex_release(&global->todo_enter); - mutex_acquire(&global->todo_wait); - if (global->scc_nwaiting == global->nworkers) { - mutex_release(&global->todo_wait); - break; - } - global->scc_nwaiting--; - } - - // Grab work - unsigned int component = global->ncomponents++; - struct scc *scc = global->scc_todo; - assert(scc != NULL); - global->scc_todo = scc->next; - scc->next = NULL; - - // Split binary semaphore release - if (global->scc_todo != NULL && global->scc_nwaiting > 0) { - mutex_release(&global->todo_wait); - } - else { - mutex_release(&global->todo_enter); - } - - for (;;) { - // Do the work - assert(scc->next == NULL); - scc = graph_find_scc_one(&global->graph, scc, component, &w->scc_cache); - - // Put new work on the list except the last (which we'll do ourselves) - mutex_acquire(&global->todo_enter); - while (scc != NULL && scc->next != NULL) { - struct scc *next = scc->next; - scc->next = global->scc_todo; - global->scc_todo = scc; - scc = next; - } - if (scc == NULL) { // get more work - break; - } - component = global->ncomponents++; - - // Split binary semaphore release - if (global->scc_todo != NULL && global->scc_nwaiting > 0) { - mutex_release(&global->todo_wait); - } - else { - mutex_release(&global->todo_enter); - } - } - } - barrier_wait(w->scc_barrier); } char *state_string(struct state *state){ @@ -3260,11 +3182,10 @@ int main(int argc, char **argv){ global->numa = ((unsigned int) (gettime() * 1000) % 2) == 0; #endif - barrier_t start_barrier, middle_barrier, end_barrier, scc_barrier; + barrier_t start_barrier, middle_barrier, end_barrier; barrier_init(&start_barrier, global->nworkers); barrier_init(&middle_barrier, global->nworkers); barrier_init(&end_barrier, global->nworkers); - barrier_init(&scc_barrier, global->nworkers); // initialize modules mutex_init(&global->inv_lock); @@ -3420,13 +3341,6 @@ int main(int argc, char **argv){ unsigned int worker_index = 0; vproc_tree_alloc(vproc_root, workers, &worker_index, global->nworkers); - struct scc_worker *scc_workers = calloc(global->nworkers, sizeof(*scc_workers)); - for (unsigned int i = 0; i < global->nworkers; i++) { - struct scc_worker *w = &scc_workers[i]; - w->global = global; - w->scc_barrier = &scc_barrier; - } - // Put the state and value dictionaries in concurrent mode dict_set_concurrent(global->values); dict_set_concurrent(visited); @@ -3508,43 +3422,6 @@ int main(int argc, char **argv){ dict_set_sequential(visited); printf("* Phase 3: analysis\n"); - -#ifdef notdef - // Shortest path to initial state (Dijkstra + minheap) - if (global->graph.size > 10000) { - printf("* Phase 3a: shortest path to initial state\n"); - fflush(stdout); - } - struct minheap *shp = minheap_create(node_cmp); - struct node *current = global->graph.nodes[0]; - for (;;) { - for (struct edge *e = current->fwd; e != NULL; e = e->fwdnext) { - struct node *d = e->dst; - assert(e->src == current); - unsigned int weight = 1; - // TODOTODO (current->to_parent == NULL || e->ctx == current->to_parent->after) ? 0 : 1; - unsigned int len = current->len + weight; - unsigned int steps = current->steps + e->nsteps; - if (d->to_parent == NULL) { - d->to_parent = e; - d->len = len; - d->steps = steps; - minheap_insert(shp, d); - } - else if (len < d->len || (d->len == len && steps < e->nsteps)) { - d->to_parent = e; - d->len = len; - d->steps = steps; - minheap_decrease(shp, d); - } - } - if (minheap_empty(shp)) { - break; - } - current = minheap_getmin(shp); - } -#endif - if (minheap_empty(global->failures)) { if (global->graph.size > 10000) { printf("* Phase 3b: strongly connected components\n"); @@ -3553,15 +3430,6 @@ int main(int argc, char **argv){ double now = gettime(); tarjan(global); global->phase2 = true; -#ifdef OBSOLETE - global->scc_todo = scc_alloc(0, global->graph.size, NULL, NULL); - - // Start all but one of the workers, who'll wait on the start barrier - for (unsigned int i = 1; i < global->nworkers; i++) { - thread_create(scc_worker, &scc_workers[i]); - } - scc_worker(&scc_workers[0]); -#endif printf(" * %u components (%.2lf seconds)\n", global->ncomponents, gettime() - now); @@ -3569,7 +3437,7 @@ int main(int argc, char **argv){ printf("digraph Harmony {\n"); for (unsigned int i = 0; i < global->graph.size; i++) { struct node *node = global->graph.nodes[i]; - printf(" s%u [label=\"%u/%u\"]\n", i, i, node->component); + printf(" s%u [label=\"%u/%u\"]\n", i, i, node->u.ph2.component); } for (unsigned int i = 0; i < global->graph.size; i++) { struct node *node = global->graph.nodes[i]; @@ -3584,8 +3452,8 @@ int main(int argc, char **argv){ struct component *components = calloc(global->ncomponents, sizeof(*components)); for (unsigned int i = 0; i < global->graph.size; i++) { struct node *node = global->graph.nodes[i]; - assert(node->component < global->ncomponents); - struct component *comp = &components[node->component]; + assert(node->u.ph2.component < global->ncomponents); + struct component *comp = &components[node->u.ph2.component]; if (comp->size == 0) { comp->rep = node; comp->all_same = value_state_all_eternal(node->state) @@ -3603,7 +3471,7 @@ int main(int argc, char **argv){ // if this component has a way out, it is good for (struct edge *edge = node->fwd; edge != NULL && !comp->good; edge = edge->fwdnext) { - if (edge->dst->component != node->component) { + if (edge->dst->u.ph2.component != node->u.ph2.component) { comp->good = true; break; } @@ -3636,8 +3504,8 @@ int main(int argc, char **argv){ // Look for states in final components for (unsigned int i = 0; i < global->graph.size; i++) { struct node *node = global->graph.nodes[i]; - assert(node->component < global->ncomponents); - struct component *comp = &components[node->component]; + assert(node->u.ph2.component < global->ncomponents); + struct component *comp = &components[node->u.ph2.component]; if (comp->final) { node->final = true; if (global->dfa != NULL && @@ -3666,7 +3534,7 @@ int main(int argc, char **argv){ int nbad = 0; for (unsigned int i = 0; i < global->graph.size; i++) { struct node *node = global->graph.nodes[i]; - if (!components[node->component].good) { + if (!components[node->u.ph2.component].good) { nbad++; struct failure *f = new_alloc(struct failure); f->type = FAIL_TERMINATION; @@ -3687,7 +3555,7 @@ int main(int argc, char **argv){ } for (unsigned int i = 0; i < global->graph.size; i++) { struct node *node = global->graph.nodes[i]; - if (components[node->component].size > 1) { + if (components[node->u.ph2.component].size > 1) { detect_busywait(global->failures, node); } } @@ -3703,8 +3571,7 @@ int main(int argc, char **argv){ else { fprintf(df, "digraph Harmony {\n"); for (unsigned int i = 0; i < global->graph.size; i++) { - struct node *node = global->graph.nodes[i]; - fprintf(df, " s%u [label=\"%u/%u\"]\n", i, i, node->len); + fprintf(df, " s%u [label=\"%u\"]\n", i, i); } for (unsigned int i = 0; i < global->graph.size; i++) { struct node *node = global->graph.nodes[i]; @@ -3745,7 +3612,7 @@ int main(int argc, char **argv){ struct node *node = global->graph.nodes[i]; assert(node->id == i); fprintf(df, "\nNode %d:\n", node->id); - fprintf(df, " component: %d\n", node->component); + fprintf(df, " component: %d\n", node->u.ph2.component); if (node->to_parent != NULL) { fprintf(df, " ancestors:"); for (struct node *n = node->to_parent->src;; n = n->to_parent->src) { @@ -3757,7 +3624,7 @@ int main(int argc, char **argv){ fprintf(df, "\n"); } fprintf(df, " vars: %s\n", value_string(node->state->vars)); - fprintf(df, " len: %u %u\n", node->len, node->steps); + // fprintf(df, " len: %u %u\n", node->len, node->steps); if (node->failed) { fprintf(df, " failed\n"); } @@ -3766,7 +3633,7 @@ int main(int argc, char **argv){ for (struct edge *edge = node->fwd; edge != NULL; edge = edge->fwdnext, eno++) { fprintf(df, " %d:\n", eno); struct context *ctx = value_get(edge->ctx, NULL); - fprintf(df, " node: %d (%d)\n", edge->dst->id, edge->dst->component); + fprintf(df, " node: %d (%d)\n", edge->dst->id, edge->dst->u.ph2.component); fprintf(df, " context before: %"PRIx64" pc=%d\n", edge->ctx, ctx->pc); ctx = value_get(edge->after, NULL); fprintf(df, " context after: %"PRIx64" pc=%d\n", edge->after, ctx->pc); @@ -3803,7 +3670,7 @@ int main(int argc, char **argv){ eno = 0; for (struct edge *edge = node->bwd; edge != NULL; edge = edge->bwdnext, eno++) { fprintf(df, " %d:\n", eno); - fprintf(df, " node: %d (%d)\n", edge->src->id, edge->src->component); + fprintf(df, " node: %d (%d)\n", edge->src->id, edge->src->u.ph2.component); struct context *ctx = value_get(edge->ctx, NULL); fprintf(df, " context before: %"PRIx64" %d\n", edge->ctx, ctx->pc); ctx = value_get(edge->after, NULL); @@ -3891,7 +3758,7 @@ int main(int argc, char **argv){ } fprintf(out, " {\n"); fprintf(out, " \"idx\": %d,\n", node->id); - fprintf(out, " \"component\": %d,\n", node->component); + fprintf(out, " \"component\": %d,\n", node->u.ph2.component); #ifdef notdef if (node->parent != NULL) { fprintf(out, " \"parent\": %d,\n", node->parent->id); diff --git a/harmony_model_checker/charm/graph.c b/harmony_model_checker/charm/graph.c index e65c20ee..ce7d97b8 100644 --- a/harmony_model_checker/charm/graph.c +++ b/harmony_model_checker/charm/graph.c @@ -101,7 +101,7 @@ struct scc *graph_find_scc_one(struct graph *graph, struct scc *scc, unsigned in } } if (optim) { - node->component = component; + node->u.ph2.component = component; scc->start++; if (scc->start < scc->finish) { return scc; @@ -120,7 +120,7 @@ struct scc *graph_find_scc_one(struct graph *graph, struct scc *scc, unsigned in swap(graph, start, (finish + start) / 2); } - graph->nodes[start]->component = component; + graph->nodes[start]->u.ph2.component = component; // Phase 1: move all successors of nodes[0] to the bottom unsigned int lo = start + 1; @@ -156,7 +156,7 @@ struct scc *graph_find_scc_one(struct graph *graph, struct scc *scc, unsigned in } if (next->id < lo) { // in SCC if (next->id >= mid) { - next->component = component; + next->u.ph2.component = component; if (next->id > mid) { swap(graph, mid, next->id); } diff --git a/harmony_model_checker/charm/graph.h b/harmony_model_checker/charm/graph.h index 762b8f98..fa009066 100644 --- a/harmony_model_checker/charm/graph.h +++ b/harmony_model_checker/charm/graph.h @@ -56,7 +56,15 @@ enum fail_type { }; struct node { - struct node *next; // for linked list + union { + struct { + struct node *next; // for linked list + } ph1; + struct { + int32_t lowlink; // for Tarjan algorithm + uint32_t component; // strongly connected component id + } ph2; + } u; // Information about state // TODO. state contiguous to this node, so don't need pointer @@ -67,16 +75,9 @@ struct node { struct edge *to_parent; // shortest path to initial state uint32_t id; // nodes are numbered starting from 0 - uint32_t component; // strongly connected component id - uint16_t len; // length of path to initial state - uint16_t steps; // #microsteps from root - - // For Tarjan SCC - int32_t index; - int32_t lowlink; - // uint32_t comp_id; // strongly connected component id - bool on_stack : 1; + int32_t index; // for Tarjan algorithm + bool on_stack : 1; // for Tarjan bool initialized : 1; // this node structure has been initialized bool failed : 1; // a thread has failed bool final : 1; // only eternal threads left (TODO: need this?) From bb02aca0f940b5eea4df66af025ac131dfc1b5d2 Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Wed, 9 Aug 2023 13:06:59 -0400 Subject: [PATCH 02/14] Working on mem footprint --- harmony_model_checker/charm/charm.c | 79 ++++++++++++++--------------- harmony_model_checker/charm/graph.h | 11 ++-- 2 files changed, 44 insertions(+), 46 deletions(-) diff --git a/harmony_model_checker/charm/charm.c b/harmony_model_checker/charm/charm.c index 9035d1f8..3d97f9c7 100644 --- a/harmony_model_checker/charm/charm.c +++ b/harmony_model_checker/charm/charm.c @@ -274,19 +274,18 @@ bool predicate_check(struct global *global, struct state *sc, struct step *step) // Returns 0 if there are no issues, or the pc of the invariant if it failed. unsigned int check_invariants(struct global *global, struct node *node, struct node *before, struct step *step){ - struct state *state = node->state; - assert(node->state != NULL); + struct state *state = node_state(node); assert(state != NULL); assert(step->ctx->sp == 0); // pre == 0 means it is a non-initialized state. hvalue_t args[2]; // (pre, post) - if (before->state->pre == 0) { + if (node_state(before)->pre == 0) { args[0] = state->vars; } else { - args[0] = before->state->pre; + args[0] = node_state(before)->pre; } args[1] = state->vars; @@ -321,8 +320,7 @@ unsigned int check_invariants(struct global *global, struct node *node, // Returns 0 if there are no issues, or the pc of the finally predicate if it failed. unsigned int check_finals(struct global *global, struct node *node, struct step *step){ - struct state *state = node->state; - assert(node->state != NULL); + struct state *state = node_state(node); assert(state != NULL); // Check each finally predicate @@ -350,16 +348,15 @@ unsigned int check_finals(struct global *global, struct node *node, struct step static void process_edge(struct worker *w, struct edge *edge, mutex_t *lock, struct node **results) { struct node *node = edge->src, *next = edge->dst; - struct state *state = (struct state *) &next[1]; // mutex_acquire(lock); bool initialized = next->initialized; if (!initialized) { next->initialized = true; - next->state = state; // TODO. Don't technically need this + // next->state = state; // TODO. Don't technically need this next->failed = edge->failed; - next->lock = lock; + next->u.ph1.lock = lock; // TODOTODO next->to_parent = edge; @@ -393,11 +390,10 @@ static void process_edge(struct worker *w, struct edge *edge, mutex_t *lock, str edge->fwdnext = *pe; *pe = edge; #else - // TODO. Do we need node->lock or can we just use the lock argument?? - if (mutex_try_acquire(node->lock)) { + if (mutex_try_acquire(node->u.ph1.lock)) { edge->fwdnext = node->fwd; node->fwd = edge; - mutex_release(node->lock); + mutex_release(node->u.ph1.lock); } else { struct edge **pe = &w->edges[node->id % w->nworkers]; @@ -466,8 +462,7 @@ static bool onestep( int multiplicity, // #contexts that are in the current state struct node **results // where to place the resulting new states ) { - assert(node->state == (struct state *) &node[1]); - assert(state_size(sc) == state_size(node->state)); + assert(state_size(sc) == state_size(node_state(node))); assert(!step->ctx->terminated); assert(!step->ctx->failed); @@ -785,7 +780,7 @@ static bool onestep( // also keep track of "pre" state. if (choosing) { sc->choosing = after; - sc->pre = global->inv_pre ? node->state->pre : sc->vars; + sc->pre = global->inv_pre ? node_state(node)->pre : sc->vars; } else { sc->pre = sc->vars; @@ -864,7 +859,7 @@ static void make_step( step.engine.values = w->global->values; // Make a copy of the state - unsigned int statesz = state_size(node->state); + unsigned int statesz = state_size(node_state(node)); // Room to grown in copy for op_Spawn #ifdef HEAP_ALLOC char *copy = malloc(statesz + 64*sizeof(hvalue_t)); @@ -872,7 +867,7 @@ static void make_step( char copy[statesz + 64*sizeof(hvalue_t)]; #endif struct state *sc = (struct state *) copy; - memcpy(sc, node->state, statesz); + memcpy(sc, node_state(node), statesz); assert(step.engine.allocator == &w->allocator); // Make a copy of the context @@ -891,14 +886,14 @@ static void make_step( assert(step.engine.allocator == &w->allocator); if (!succ) { // ran into an infinite loop step.nlog = 0; - memcpy(sc, node->state, statesz); + memcpy(sc, node_state(node), statesz); memcpy(&w->ctx, cc, size); assert(step.engine.allocator == &w->allocator); (void) onestep(w, node, sc, ctx, &step, choice, true, true, multiplicity, results); assert(step.engine.allocator == &w->allocator); } - memcpy(sc, node->state, statesz); + memcpy(sc, node_state(node), statesz); memcpy(&w->ctx, cc, size); assert(step.engine.allocator == &w->allocator); } @@ -908,7 +903,7 @@ static void make_step( assert(step.engine.allocator == &w->allocator); if (!succ) { // ran into an infinite loop step.nlog = 0; - memcpy(sc, node->state, statesz); + memcpy(sc, node_state(node), statesz); memcpy(&w->ctx, cc, size); assert(step.engine.allocator == &w->allocator); (void) onestep(w, node, sc, ctx, &step, choice, false, true, multiplicity, results); @@ -921,10 +916,10 @@ static void make_step( } char *ctx_status(struct node *node, hvalue_t ctx) { - if (node->state->choosing == ctx) { + if (node_state(node)->choosing == ctx) { return "choosing"; } - while (node->state->choosing != 0) { + while (node_state(node)->choosing != 0) { node = node->to_parent->src; } struct edge *edge; @@ -1326,7 +1321,7 @@ void path_recompute(struct global *global){ struct node *node = global->graph.nodes[0]; struct state *sc = calloc(1, sizeof(struct state) + MAX_CONTEXT_BAG * (sizeof(hvalue_t) + 1)); - memcpy(sc, node->state, state_size(node->state)); + memcpy(sc, node_state(node), state_size(node_state(node))); for (unsigned int i = 0; i < global->nmacrosteps; i++) { struct macrostep *macro = global->macrosteps[i]; @@ -1617,7 +1612,7 @@ static void path_output_macrostep(struct global *global, FILE *file, struct macr fprintf(file, "\n ],\n"); fprintf(file, " \"ctxbag\": {\n"); - struct state *state = macro->edge->dst->state; + struct state *state = node_state(macro->edge->dst); for (unsigned int i = 0; i < state->bagsize; i++) { if (i > 0) { fprintf(file, ",\n"); @@ -2009,7 +2004,7 @@ static enum busywait is_stuck( if (node->visited) { return BW_VISITED; } - change = change || (node->state->vars != start->state->vars); + change = change || (node_state(node)->vars != node_state(start)->vars); node->visited = true; enum busywait result = BW_ESCAPE; for (struct edge *edge = node->fwd; edge != NULL; edge = edge->fwdnext) { @@ -2047,8 +2042,8 @@ static enum busywait is_stuck( } static void detect_busywait(struct minheap *failures, struct node *node){ - for (unsigned int i = 0; i < node->state->bagsize; i++) { - if (is_stuck(node, node, state_contexts(node->state)[i], false) == BW_RETURN) { + for (unsigned int i = 0; i < node_state(node)->bagsize; i++) { + if (is_stuck(node, node, state_contexts(node_state(node))[i], false) == BW_RETURN) { struct failure *f = new_alloc(struct failure); f->type = FAIL_BUSYWAIT; f->edge = node->to_parent; @@ -2086,7 +2081,7 @@ void do_work1(struct worker *w, struct node *node, unsigned int level){ if (node->failed) { return; } - struct state *state = node->state; + struct state *state = node_state(node); if (state->choosing != 0) { assert(VALUE_TYPE(state->choosing) == VALUE_CONTEXT); @@ -3350,8 +3345,8 @@ int main(int argc, char **argv){ struct dict_assoc *hn = dict_find_lock(visited, &workers[0].allocator, state, state_size(state), NULL, &lock); struct node *node = (struct node *) &hn[1]; memset(node, 0, sizeof(*node)); - node->state = (struct state *) &node[1]; - node->lock = lock; + // node->state = (struct state *) &node[1]; + node->u.ph1.lock = lock; mutex_release(lock); node->index = -1; graph_add(&global->graph, node); @@ -3456,12 +3451,12 @@ int main(int argc, char **argv){ struct component *comp = &components[node->u.ph2.component]; if (comp->size == 0) { comp->rep = node; - comp->all_same = value_state_all_eternal(node->state) - && value_ctx_all_eternal(node->state->stopbag); + comp->all_same = value_state_all_eternal(node_state(node)) + && value_ctx_all_eternal(node_state(node)->stopbag); } - else if (node->state->vars != comp->rep->state->vars || - !value_state_all_eternal(node->state) || - !value_ctx_all_eternal(node->state->stopbag)) { + else if (node_state(node)->vars != node_state(comp->rep)->vars || + !value_state_all_eternal(node_state(node)) || + !value_ctx_all_eternal(node_state(node)->stopbag)) { comp->all_same = false; } comp->size++; @@ -3509,7 +3504,7 @@ int main(int argc, char **argv){ if (comp->final) { node->final = true; if (global->dfa != NULL && - !dfa_is_final(global->dfa, node->state->dfa_state)) { + !dfa_is_final(global->dfa, node_state(node)->dfa_state)) { struct failure *f = new_alloc(struct failure); f->type = FAIL_BEHAVIOR; f->edge = node->to_parent; @@ -3576,7 +3571,7 @@ int main(int argc, char **argv){ for (unsigned int i = 0; i < global->graph.size; i++) { struct node *node = global->graph.nodes[i]; for (struct edge *edge = node->fwd; edge != NULL; edge = edge->fwdnext) { - struct state *state = node->state; + struct state *state = node_state(node); unsigned int j; for (j = 0; j < state->bagsize; j++) { if (state_contexts(state)[j] == edge->ctx) { @@ -3623,7 +3618,7 @@ int main(int argc, char **argv){ } fprintf(df, "\n"); } - fprintf(df, " vars: %s\n", value_string(node->state->vars)); + fprintf(df, " vars: %s\n", value_string(node_state(node)->vars)); // fprintf(df, " len: %u %u\n", node->len, node->steps); if (node->failed) { fprintf(df, " failed\n"); @@ -3763,8 +3758,8 @@ int main(int argc, char **argv){ if (node->parent != NULL) { fprintf(out, " \"parent\": %d,\n", node->parent->id); } - char *val = json_escape_value(node->state->vars); - fprintf(out, " \"value\": \"%s:%d\",\n", val, node->state->choosing != 0); + char *val = json_escape_value(node_state(node)->vars); + fprintf(out, " \"value\": \"%s:%d\",\n", val, node_state(node)->choosing != 0); free(val); #endif print_transitions(out, symbols, node->fwd); @@ -3874,8 +3869,8 @@ int main(int argc, char **argv){ inv_ctx->readonly = 1; hvalue_t args[2]; - args[0] = bad->edge->src->state->vars; - args[1] = bad->edge->dst->state->vars; + args[0] = node_state(bad->edge->src)->vars; + args[1] = node_state(bad->edge->dst)->vars; value_ctx_push(inv_ctx, value_put_list(&engine, args, sizeof(args))); hvalue_t inv_context = value_put_context(&engine, inv_ctx); diff --git a/harmony_model_checker/charm/graph.h b/harmony_model_checker/charm/graph.h index fa009066..840cfecc 100644 --- a/harmony_model_checker/charm/graph.h +++ b/harmony_model_checker/charm/graph.h @@ -57,21 +57,22 @@ enum fail_type { struct node { union { + // Data we only need while creating the Kripke structure struct { struct node *next; // for linked list + ht_lock_t *lock; // lock for forward edges } ph1; + // Data we only need when analyzing the Kripke structure struct { - int32_t lowlink; // for Tarjan algorithm + int32_t lowlink; // for Tarjan SCC algorithm uint32_t component; // strongly connected component id } ph2; } u; // Information about state - // TODO. state contiguous to this node, so don't need pointer - struct state *state; // state corresponding to this node + // struct state *state; // state corresponding to this node struct edge *bwd; // backward edges struct edge *fwd; // forward edges - ht_lock_t *lock; // lock for forward edges struct edge *to_parent; // shortest path to initial state uint32_t id; // nodes are numbered starting from 0 @@ -87,6 +88,8 @@ struct node { bool reachable : 1; }; +#define node_state(n) ((struct state *) &(n)[1]) + struct failure { struct failure *next; // for linked list maintenance enum fail_type type; // failure type From 894b0e7ee5f7e1db10db70fd0fd89cd85bfb44df Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Wed, 9 Aug 2023 15:03:50 -0400 Subject: [PATCH 03/14] Working on faster stack --- harmony_model_checker/charm/charm.c | 204 +++++++++++++++------------- harmony_model_checker/charm/graph.c | 158 --------------------- harmony_model_checker/charm/graph.h | 5 - harmony_model_checker/charm/head.h | 2 +- 4 files changed, 108 insertions(+), 261 deletions(-) diff --git a/harmony_model_checker/charm/charm.c b/harmony_model_checker/charm/charm.c index 3d97f9c7..618e259a 100644 --- a/harmony_model_checker/charm/charm.c +++ b/harmony_model_checker/charm/charm.c @@ -354,6 +354,7 @@ static void process_edge(struct worker *w, struct edge *edge, mutex_t *lock, str bool initialized = next->initialized; if (!initialized) { next->initialized = true; + next->reachable = true; // next->state = state; // TODO. Don't technically need this next->failed = edge->failed; next->u.ph1.lock = lock; @@ -367,18 +368,11 @@ static void process_edge(struct worker *w, struct edge *edge, mutex_t *lock, str next->index = -1; // for Tarjan - edge->bwdnext = NULL; next->u.ph1.next = *results; *results = next; w->count++; w->enqueued++; } - else { - assert(next->failed == edge->failed); - edge->bwdnext = next->bwd; - } - - next->bwd = edge; mutex_release(lock); @@ -2643,55 +2637,84 @@ static void worker(void *arg){ } } +#define STACK_CHUNK 3 + +// The stack contains pointers to either nodes or edges. +// Which of the two it is is captured in the lowest bit. +// Memory space is at premium here... struct stack { - struct stack *next; - unsigned int v1; // TODO == v2->src - struct edge *v2; -} *stack_free; + struct stack *next, *prev; + void *ptrs[STACK_CHUNK]; // low bit = 1 --> edge + unsigned int sp; +}; -static void stack_push(struct stack **sp, unsigned int v1, struct edge *v2) { - struct stack *s = stack_free; - if (s == NULL) { - s = malloc(sizeof(*s)); +static void stack_push(struct stack **sp, struct node *v1, struct edge *v2) { + // See if there's space in the current chunk + struct stack *s = *sp; + if (s->sp == STACK_CHUNK) { + if (s->next == NULL) { + s->next = malloc(sizeof(*s)); + s->next->prev = s; + s = s->next; + s->sp = 0; + s->next = NULL; + } + else { + s = s->next; + assert(s->sp == 0); + } + *sp = s; + } + + // Push either a node or edge pointer + if (v2 != NULL) { + assert(v1 == v2->src); + s->ptrs[s->sp++] = (char *) v2 + 1; } else { - stack_free = s->next; + s->ptrs[s->sp++] = v1; } - s->next = *sp; - s->v1 = v1; - s->v2 = v2; - *sp = s; } -static void stack_pop(struct stack **sp, unsigned int *v1, struct edge **v2) { +static void stack_pop(struct stack **sp, struct node **v1, struct edge **v2) { + // If the current chunk is empty, go to the previous one struct stack *s = *sp; - *sp = s->next; - *v1 = s->v1; - if (v2 != NULL) { - *v2 = s->v2; + if (s->sp == 0) { + s = s->prev; + assert(s->sp == STACK_CHUNK); + *sp = s; + } + + void *ptr = s->ptrs[--s->sp]; + if ((hvalue_t) ptr & 1) { // edge + *v2 = (struct edge *) ((char *) ptr - 1); + *v1 = (*v2)->src; + } + else { // node + *v1 = ptr; + if (v2 != NULL) { + *v2 = NULL; + } } - s->next = stack_free; - stack_free = s; } +// Tarjan SCC algorithm static void tarjan(struct global *global){ unsigned int i = 0, comp_id = 0; - struct stack *stack = NULL; - struct stack *call_stack = NULL; + struct stack *stack = calloc(1, sizeof(*stack)); + struct stack *call_stack = calloc(1, sizeof(*call_stack)); for (unsigned int v = 0; v < 1 /*global->graph.size*/; v++) { struct node *n = global->graph.nodes[v]; if (n->index == -1) { - stack_push(&call_stack, v, NULL); + stack_push(&call_stack, n, NULL); while (call_stack != NULL) { - unsigned int v1; struct edge *e; - stack_pop(&call_stack, &v1, &e); - n = global->graph.nodes[v1]; + stack_pop(&call_stack, &n, &e); if (e == NULL) { n->index = i; n->u.ph2.lowlink = i; i++; - stack_push(&stack, v1, NULL); + stack_push(&stack, n, NULL); n->on_stack = true; e = n->fwd; } @@ -2712,17 +2735,16 @@ static void tarjan(struct global *global){ e = e->fwdnext; } if (e != NULL) { - stack_push(&call_stack, v1, e); - stack_push(&call_stack, e->dst->id, NULL); + stack_push(&call_stack, n, e); + stack_push(&call_stack, e->dst, NULL); } else if (n->u.ph2.lowlink == n->index) { for (;;) { - unsigned int v2; - stack_pop(&stack, &v2, NULL); - struct node *n2 = global->graph.nodes[v2]; + struct node *n2; + stack_pop(&stack, &n2, NULL); n2->on_stack = false; n2->u.ph2.component = comp_id; - if (v2 == v1) { + if (n2 == n) { break; } } @@ -2751,57 +2773,63 @@ char *state_string(struct state *state){ // This routine removes all nodes that have a single incoming edge and it's // an "epsilon" edge (empty print log). These are essentially useless nodes. -// Typically about half of the nodes can be removed this way. static void destutter1(struct global *global){ struct graph *graph = &global->graph; + + // If nothing got printed, we can just return a single node if (!global->printed_something) { - global->graph.size = 1; - struct node *n = global->graph.nodes[0]; + graph->size = 1; + struct node *n = graph->nodes[0]; n->final = 1; - n->fwd = n->bwd = NULL; + n->fwd = NULL; } - for (unsigned int i = 0; i < graph->size; i++) { - struct node *n = graph->nodes[i]; - if (n->bwd != NULL && n->bwd->bwdnext == NULL && n->bwd->nlog == 0) { - struct node *parent = n->bwd->src; - - if (n->final) { - parent->final = true; - } +#ifdef OBSOLETE + graph->nodes[0]->reachable = true; + int ndropped = 0; + for (unsigned int i = 0; i < graph->size; i++) { + struct node *parent = graph->nodes[i]; + struct edge **pe, *e; + for (pe = &parent->fwd; (e = *pe) != NULL;) { + struct node *n = e->dst; + if (e->nlog == 0) { // epsilon edge (no prints) + assert(n->bwd != NULL); + if (n->bwd->bwdnext == NULL) { + assert(n->bwd == e); + + if (n->final) { + parent->final = true; + } - // Remove the edge from the parent - struct edge **pe, *e; - for (pe = &parent->fwd; (e = *pe) != NULL; pe = &e->fwdnext) { - if (e->dst == n && e->nlog == 0) { + // Remove the edge *pe = e->fwdnext; + n->bwd = NULL; // free(e); - break; - } - } - struct edge *next; - for (struct edge *e = n->fwd; e != NULL; e = next) { - // Move the outgoing edge to the parent. - next = e->fwdnext; - e->fwdnext = parent->fwd; - parent->fwd = e; + ndropped++; - // Fix the corresponding backwards edge - for (struct edge *f = e->dst->bwd; f != NULL; f = f->bwdnext) { - if (f->src == n && f->nlog == e->nlog && - memcmp(edge_log(f), edge_log(e), f->nlog * sizeof(hvalue_t)) == 0) { - f->src = parent; - break; + // Move the outgoing edges to the parent. + while ((e = n->fwd) != NULL) { + n->fwd = e->fwdnext; + e->fwdnext = *pe; + *pe = e; + e->src = parent; } + n->reachable = false; + } + else { + n->reachable = true; + pe = &e->fwdnext; } } - n->reachable = false; - } - else { - n->reachable = true; + else { + n->reachable = true; + pe = &e->fwdnext; + } } } + // printf("DROPPED %d / %d\n", ndropped, graph->size); +#endif // OBSOLETE } static struct dict *collect_symbols(struct graph *graph){ @@ -3348,6 +3376,7 @@ int main(int argc, char **argv){ // node->state = (struct state *) &node[1]; node->u.ph1.lock = lock; mutex_release(lock); + node->reachable = true; node->index = -1; graph_add(&global->graph, node); @@ -3661,28 +3690,6 @@ int main(int argc, char **argv){ } } } - fprintf(df, " bwd:\n"); - eno = 0; - for (struct edge *edge = node->bwd; edge != NULL; edge = edge->bwdnext, eno++) { - fprintf(df, " %d:\n", eno); - fprintf(df, " node: %d (%d)\n", edge->src->id, edge->src->u.ph2.component); - struct context *ctx = value_get(edge->ctx, NULL); - fprintf(df, " context before: %"PRIx64" %d\n", edge->ctx, ctx->pc); - ctx = value_get(edge->after, NULL); - fprintf(df, " context after: %"PRIx64" %d\n", edge->after, ctx->pc); - if (edge->choice != 0) { - fprintf(df, " choice: %s\n", value_string(edge->choice)); - } - if (edge->nlog > 0) { - fprintf(df, " log:"); - for (int j = 0; j < edge->nlog; j++) { - char *p = value_string(edge_log(edge)[j]); - fprintf(df, " %s", p); - free(p); - } - fprintf(df, "\n"); - } - } } fclose(df); } @@ -3728,6 +3735,9 @@ int main(int argc, char **argv){ json_dump(jv, out, 2); fprintf(out, ",\n"); + // Reduce the output graph by removing nodes with only + // one incoming edge that is an epsilon edge + // destutter1(global); // Output the symbols; diff --git a/harmony_model_checker/charm/graph.c b/harmony_model_checker/charm/graph.c index ce7d97b8..fb2b7fca 100644 --- a/harmony_model_checker/charm/graph.c +++ b/harmony_model_checker/charm/graph.c @@ -40,164 +40,6 @@ unsigned int graph_add_multiple(struct graph *graph, unsigned int n) { return node_id; } -static void inline swap(struct graph *graph, unsigned int x, unsigned int y){ - struct node *tmp = graph->nodes[x]; - graph->nodes[x] = graph->nodes[y]; - graph->nodes[y] = tmp; - graph->nodes[x]->id = x; - graph->nodes[y]->id = y; -} - -struct scc *scc_alloc(unsigned int start, unsigned int finish, struct scc *next, void **scc_cache){ - struct scc *scc; - - if (scc_cache == NULL) { - scc = new_alloc(struct scc); - } - else if ((scc = *scc_cache) == NULL) { - scc = malloc(256 * sizeof(*scc)); - for (int i = 1; i < 256; i++) { - * (void **) &scc[i] = *scc_cache; - *scc_cache = &scc[i]; - } - } - else { - *scc_cache = * (void **) scc; - } - scc->start = start; - scc->finish = finish; - scc->next = next; - return scc; -} - -// Partition the range scc->start to scc->finish up into four chunks: the nodes in the strongly -// connected component holding node scc->start, the remaining successors of the node, the remaining -// nodes minus the predecessors and the successors, and finally the predecessors minus the nodes -// in the strongly connected component. Then iteratively visit the last three partitions. -struct scc *graph_find_scc_one(struct graph *graph, struct scc *scc, unsigned int component, void **scc_cache) { - unsigned int start = scc->start; - unsigned int finish = scc->finish; - assert(start < finish); - struct scc *next_scc = scc->next; - - // Optimization. See if this node has either no incoming or - // no outgoing edges. If so, it's a component in its own right - bool optim = true; - struct node *node = graph->nodes[start]; - for (struct edge *e = node->fwd; e != NULL; e = e->fwdnext) { - struct node *next = e->dst; - if (next->id >= start && next->id < finish) { - optim = false; - break; - } - } - if (!optim) { - optim = true; - for (struct edge *e = node->bwd; e != NULL; e = e->bwdnext) { - struct node *next = e->dst; - if (next->id >= start && next->id < finish) { - optim = false; - } - } - } - if (optim) { - node->u.ph2.component = component; - scc->start++; - if (scc->start < scc->finish) { - return scc; - } - * (void **) scc = *scc_cache; - *scc_cache = scc; - return next_scc; - } - - * (void **) scc = *scc_cache; - *scc_cache = scc; - scc = next_scc; - - // Better balancing? - if (finish - start > 100) { - swap(graph, start, (finish + start) / 2); - } - - graph->nodes[start]->u.ph2.component = component; - - // Phase 1: move all successors of nodes[0] to the bottom - unsigned int lo = start + 1; - for (unsigned int i = start; i < lo; i++) { - struct node *node = graph->nodes[i]; - for (struct edge *e = node->fwd; e != NULL; e = e->fwdnext) { - struct node *next = e->dst; - if (next->id < start || next->id >= finish) { - continue; - } - if (next->id < lo) { - continue; - } - if (next->id > lo) { - swap(graph, lo, next->id); - } - lo++; - assert(lo <= finish); - } - } - - unsigned int hi = finish - 1; - unsigned int mid = start + 1; - - // Phase 2: move all precedessors - for (unsigned int i = start, j = hi; i < mid || j > hi;) { - bool in_scc = i < mid; - struct node *node = in_scc ? graph->nodes[i] : graph->nodes[j]; - for (struct edge *e = node->bwd; e != NULL; e = e->bwdnext) { - struct node *next = e->src; - if (next->id < start || next->id >= finish) { - continue; - } - if (next->id < lo) { // in SCC - if (next->id >= mid) { - next->u.ph2.component = component; - if (next->id > mid) { - swap(graph, mid, next->id); - } - mid++; - assert(mid <= lo); - } - } - else { - if (next->id <= hi) { - if (next->id < hi) { - swap(graph, hi, next->id); - } - hi--; - assert(hi >= start); - } - } - } - if (in_scc) { i++; } else { j--; } - } - if (mid < lo) { // predecessors - scc - scc = scc_alloc(mid, lo, scc, scc_cache); - } - if (lo <= hi) { // rest - scc = scc_alloc(lo, hi + 1, scc, scc_cache); - } - if (hi + 1 < finish) { // successors - scc - scc = scc_alloc(hi + 1, finish, scc, scc_cache); - } - return scc; -} - -unsigned int graph_find_scc(struct graph *graph) { - struct scc *scc = scc_alloc(0, graph->size, NULL, NULL); - unsigned int count = 0; - while (scc != NULL) { - scc = graph_find_scc_one(graph, scc, count, NULL); - count++; - } - return count; -} - bool graph_edge_conflict( struct minheap *warnings, struct engine *engine, diff --git a/harmony_model_checker/charm/graph.h b/harmony_model_checker/charm/graph.h index 840cfecc..36747520 100644 --- a/harmony_model_checker/charm/graph.h +++ b/harmony_model_checker/charm/graph.h @@ -27,7 +27,6 @@ struct access_info { struct edge { struct edge *fwdnext; // forward linked list maintenance - struct edge *bwdnext; // backward linked list maintenance hvalue_t ctx, choice; // ctx that made the microstep, choice if any struct node *src; // source node struct node *dst; // destination node @@ -69,11 +68,7 @@ struct node { } ph2; } u; - // Information about state - // struct state *state; // state corresponding to this node - struct edge *bwd; // backward edges struct edge *fwd; // forward edges - struct edge *to_parent; // shortest path to initial state uint32_t id; // nodes are numbered starting from 0 int32_t index; // for Tarjan algorithm diff --git a/harmony_model_checker/charm/head.h b/harmony_model_checker/charm/head.h index ee524a2f..c1c85a24 100644 --- a/harmony_model_checker/charm/head.h +++ b/harmony_model_checker/charm/head.h @@ -1,4 +1,4 @@ -// #undef NDEBUG +#undef NDEBUG #ifdef _WIN32 #define HEAP_ALLOC From d55c129227efaab44e70add06ee825732204a4d1 Mon Sep 17 00:00:00 2001 From: Robbert Van Renesse Date: Wed, 9 Aug 2023 15:14:02 -0400 Subject: [PATCH 04/14] Fast stack for Tarjan --- harmony_model_checker/charm/charm.c | 10 ++++++++-- harmony_model_checker/charm/head.h | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/harmony_model_checker/charm/charm.c b/harmony_model_checker/charm/charm.c index 618e259a..1cb8a349 100644 --- a/harmony_model_checker/charm/charm.c +++ b/harmony_model_checker/charm/charm.c @@ -2637,7 +2637,7 @@ static void worker(void *arg){ } } -#define STACK_CHUNK 3 +#define STACK_CHUNK 4096 // The stack contains pointers to either nodes or edges. // Which of the two it is is captured in the lowest bit. @@ -2660,6 +2660,7 @@ static void stack_push(struct stack **sp, struct node *v1, struct edge *v2) { s->next = NULL; } else { + assert(s->next->prev == s); s = s->next; assert(s->sp == 0); } @@ -2680,6 +2681,7 @@ static void stack_pop(struct stack **sp, struct node **v1, struct edge **v2) { // If the current chunk is empty, go to the previous one struct stack *s = *sp; if (s->sp == 0) { + assert(s->prev->next == s); s = s->prev; assert(s->sp == STACK_CHUNK); *sp = s; @@ -2698,6 +2700,10 @@ static void stack_pop(struct stack **sp, struct node **v1, struct edge **v2) { } } +static inline bool stack_empty(struct stack *s) { + return s->prev == NULL && s->sp == 0; +} + // Tarjan SCC algorithm static void tarjan(struct global *global){ unsigned int i = 0, comp_id = 0; @@ -2707,7 +2713,7 @@ static void tarjan(struct global *global){ struct node *n = global->graph.nodes[v]; if (n->index == -1) { stack_push(&call_stack, n, NULL); - while (call_stack != NULL) { + while (!stack_empty(call_stack)) { struct edge *e; stack_pop(&call_stack, &n, &e); if (e == NULL) { diff --git a/harmony_model_checker/charm/head.h b/harmony_model_checker/charm/head.h index c1c85a24..ee524a2f 100644 --- a/harmony_model_checker/charm/head.h +++ b/harmony_model_checker/charm/head.h @@ -1,4 +1,4 @@ -#undef NDEBUG +// #undef NDEBUG #ifdef _WIN32 #define HEAP_ALLOC From 2ffeecdc47f41986b9e2cb9c679e04c5997f9c23 Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Wed, 9 Aug 2023 15:19:04 -0400 Subject: [PATCH 05/14] Latest --- harmony_model_checker/charm/charm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/harmony_model_checker/charm/charm.c b/harmony_model_checker/charm/charm.c index 1cb8a349..645d21a6 100644 --- a/harmony_model_checker/charm/charm.c +++ b/harmony_model_checker/charm/charm.c @@ -2677,7 +2677,7 @@ static void stack_push(struct stack **sp, struct node *v1, struct edge *v2) { } } -static void stack_pop(struct stack **sp, struct node **v1, struct edge **v2) { +static struct node *stack_pop(struct stack **sp, struct edge **v2) { // If the current chunk is empty, go to the previous one struct stack *s = *sp; if (s->sp == 0) { @@ -2690,13 +2690,13 @@ static void stack_pop(struct stack **sp, struct node **v1, struct edge **v2) { void *ptr = s->ptrs[--s->sp]; if ((hvalue_t) ptr & 1) { // edge *v2 = (struct edge *) ((char *) ptr - 1); - *v1 = (*v2)->src; + return (*v2)->src; } else { // node - *v1 = ptr; if (v2 != NULL) { *v2 = NULL; } + return ptr; } } @@ -2715,7 +2715,7 @@ static void tarjan(struct global *global){ stack_push(&call_stack, n, NULL); while (!stack_empty(call_stack)) { struct edge *e; - stack_pop(&call_stack, &n, &e); + n = stack_pop(&call_stack, &e); if (e == NULL) { n->index = i; n->u.ph2.lowlink = i; @@ -2747,7 +2747,7 @@ static void tarjan(struct global *global){ else if (n->u.ph2.lowlink == n->index) { for (;;) { struct node *n2; - stack_pop(&stack, &n2, NULL); + n2 = stack_pop(&stack, NULL); n2->on_stack = false; n2->u.ph2.component = comp_id; if (n2 == n) { From fbff5f0ce931f15417968b101693fcd8e7281844 Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Wed, 9 Aug 2023 15:22:35 -0400 Subject: [PATCH 06/14] Added comment --- harmony_model_checker/charm/value.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/harmony_model_checker/charm/value.h b/harmony_model_checker/charm/value.h index ad35a611..be310292 100644 --- a/harmony_model_checker/charm/value.h +++ b/harmony_model_checker/charm/value.h @@ -9,6 +9,10 @@ #define MAX_CONTEXT_STACK 250 // maximum size of context stack #define MAX_CONTEXT_BAG 32 // maximum number of distinct contexts +// TODO. State can be reduced in size in various ways; +// - pre and stopbag are not always used +// - choosing could be an index into the context bag +// - the entire thing could be replaced with a collision-resistant hash typedef struct state { hvalue_t vars; // shared variables hvalue_t pre; // "pre" state (same as vars in non-choosing states) From 64f8bbb118f0b9b2abd5a4d949ef4e61eded858a Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Wed, 9 Aug 2023 16:06:04 -0400 Subject: [PATCH 07/14] Latest --- harmony_model_checker/charm/graph.h | 1 + 1 file changed, 1 insertion(+) diff --git a/harmony_model_checker/charm/graph.h b/harmony_model_checker/charm/graph.h index 36747520..95b798b6 100644 --- a/harmony_model_checker/charm/graph.h +++ b/harmony_model_checker/charm/graph.h @@ -36,6 +36,7 @@ struct edge { uint16_t multiplicity; // multiplicity of context bool interrupt : 1; // set if state change is an interrupt // TODO. Is choosing == (choice != 0)? + // Also, edge->src->choosing is probably the same bool choosing : 1; // destination state is choosing bool failed : 1; // context failed uint16_t nlog : 12; // size of print history From bbbb09077098b0d40865cff39be046f0794d300a Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Wed, 9 Aug 2023 22:28:31 -0400 Subject: [PATCH 08/14] Latest --- harmony_model_checker/charm/graph.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmony_model_checker/charm/graph.h b/harmony_model_checker/charm/graph.h index 95b798b6..02c68df9 100644 --- a/harmony_model_checker/charm/graph.h +++ b/harmony_model_checker/charm/graph.h @@ -70,7 +70,7 @@ struct node { } u; struct edge *fwd; // forward edges - struct edge *to_parent; // shortest path to initial state + struct edge *to_parent; // a path to initial state uint32_t id; // nodes are numbered starting from 0 int32_t index; // for Tarjan algorithm From a293db31be28ef21832cc96a5fbd918bef96ecd9 Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Wed, 9 Aug 2023 22:29:36 -0400 Subject: [PATCH 09/14] Added numa preference --- harmony_model_checker/charm/charm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/harmony_model_checker/charm/charm.c b/harmony_model_checker/charm/charm.c index 645d21a6..6c6e0ed0 100644 --- a/harmony_model_checker/charm/charm.c +++ b/harmony_model_checker/charm/charm.c @@ -4,6 +4,7 @@ #ifdef __linux__ #include //cpu_set_t, CPU_SET +#include #endif #include @@ -2982,6 +2983,10 @@ static void usage(char *prog){ } int main(int argc, char **argv){ +#ifdef NUMA + numa_available(); + numa_set_preferred(0); +#endif bool cflag = false, dflag = false, Dflag = false, Rflag = false; int i, maxtime = 300000000 /* about 10 years */; char *outfile = NULL, *dfafile = NULL, *worker_flag = NULL; From 3d69093a7139a436254002b8bc1137304e2a976a Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Thu, 10 Aug 2023 11:12:44 -0400 Subject: [PATCH 10/14] Working on numa stuff --- harmony_model_checker/charm/charm.c | 72 +++++++++++++++++++++++++++-- harmony_model_checker/charm/head.h | 2 + 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/harmony_model_checker/charm/charm.c b/harmony_model_checker/charm/charm.c index 6c6e0ed0..d74ede1c 100644 --- a/harmony_model_checker/charm/charm.c +++ b/harmony_model_checker/charm/charm.c @@ -137,6 +137,57 @@ struct worker { #define ALIGNMASK 0xF #endif +#ifdef NUMA + +#define WALLOC_BSIZE 4 + +struct bounded_buffer { + void *buf[WALLOC_BSIZE]; + int occupied; + int nextin; + int nextout; + pthread_mutex_t mutex; + pthread_cond_t more; + pthread_cond_t less; +} walloc_bb; + +// This thread allocates chunks of memory and puts them in a buffer +static void walloc_producer(void *arg){ +#ifdef __linux__ + struct worker *w = arg; + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(w->vproc, &cpuset); +#endif + + pthread_mutex_lock(&walloc_bb.mutex); + for (;;) { + while (walloc_bb.occupied >= WALLOC_BSIZE) + pthread_cond_wait(&walloc_bb.less, &walloc_bb.mutex); + assert(walloc_bb.occupied < WALLOC_BSIZE); + walloc_bb.buf[walloc_bb.nextin++] = aligned_alloc(16, WALLOC_CHUNK); + walloc_bb.nextin %= WALLOC_BSIZE; + walloc_bb.occupied++; + pthread_cond_signal(&walloc_bb.more); + } + // pthread_mutex_unlock(&walloc_bb.mutex); +} + +void *walloc_consume(){ + pthread_mutex_lock(&walloc_bb.mutex); + while(walloc_bb.occupied <= 0) + pthread_cond_wait(&walloc_bb.more, &walloc_bb.mutex); + assert(walloc_bb.occupied > 0); + void *item = walloc_bb.buf[walloc_bb.nextout++]; + walloc_bb.nextout %= WALLOC_BSIZE; + walloc_bb.occupied--; + pthread_cond_signal(&walloc_bb.less); + pthread_mutex_unlock(&walloc_bb.mutex); + return item; +} + +#endif // NUMA + // Per thread one-time memory allocator (no free()) static void *walloc(void *ctx, unsigned int size, bool zero, bool align16){ struct worker *w = ctx; @@ -152,16 +203,21 @@ static void *walloc(void *ctx, unsigned int size, bool zero, bool align16){ w->align_waste += asize - size; if (w->alloc_ptr16 + asize > w->alloc_buf16 + WALLOC_CHUNK) { w->frag_waste += WALLOC_CHUNK - (w->alloc_ptr16 - w->alloc_buf16); +#ifdef NUMA + w->alloc_buf16 = walloc_consume(); + w->alloc_ptr16 = w->alloc_buf16; +#else // NUMA #ifdef ALIGNED_ALLOC w->alloc_buf16 = my_aligned_alloc(ALIGNMASK + 1, WALLOC_CHUNK); w->alloc_ptr16 = w->alloc_buf16; -#else +#else // ALIGNED_ALLOC w->alloc_buf16 = malloc(WALLOC_CHUNK); w->alloc_ptr16 = w->alloc_buf16; if (((hvalue_t) w->alloc_ptr16 & ALIGNMASK) != 0) { w->alloc_ptr16 = (char *) ((hvalue_t) (w->alloc_ptr16 + ALIGNMASK) & ~ALIGNMASK); } -#endif +#endif // ALIGNED_ALLOC +#endif // NUMA w->allocated += WALLOC_CHUNK; } result = w->alloc_ptr16; @@ -172,7 +228,11 @@ static void *walloc(void *ctx, unsigned int size, bool zero, bool align16){ w->align_waste += asize - size; if (w->alloc_ptr + asize > w->alloc_buf + WALLOC_CHUNK) { w->frag_waste += WALLOC_CHUNK - (w->alloc_ptr - w->alloc_buf); +#ifdef NUMA + w->alloc_buf = walloc_consume(); +#else w->alloc_buf = malloc(WALLOC_CHUNK); +#endif w->alloc_ptr = w->alloc_buf; w->allocated += WALLOC_CHUNK; } @@ -2983,9 +3043,11 @@ static void usage(char *prog){ } int main(int argc, char **argv){ -#ifdef NUMA +#ifdef XNUMA +#ifdef __linux__ numa_available(); numa_set_preferred(0); +#endif #endif bool cflag = false, dflag = false, Dflag = false, Rflag = false; int i, maxtime = 300000000 /* about 10 years */; @@ -3395,6 +3457,10 @@ int main(int argc, char **argv){ global->allocated = global->graph.size * sizeof(struct node *) + dict_allocated(visited) + dict_allocated(global->values); +#ifdef NUMA + thread_create(walloc_producer, &workers[0]); +#endif + // Start all but one of the workers, who'll wait on the start barrier for (unsigned int i = 1; i < global->nworkers; i++) { thread_create(worker, &workers[i]); diff --git a/harmony_model_checker/charm/head.h b/harmony_model_checker/charm/head.h index ee524a2f..06231435 100644 --- a/harmony_model_checker/charm/head.h +++ b/harmony_model_checker/charm/head.h @@ -19,3 +19,5 @@ void *my_aligned_alloc(size_t alignment, size_t size); #ifndef __STDC_NO_ATOMICS__ #define USE_ATOMIC #endif + +#define NUMA From cba3cbec0badac7724daa194faebb7851c04fe0e Mon Sep 17 00:00:00 2001 From: Robbert Van Renesse Date: Thu, 10 Aug 2023 12:12:55 -0400 Subject: [PATCH 11/14] Working on NUMA --- harmony_model_checker/charm/charm.c | 13 +++++++------ setup.py | 5 +++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/harmony_model_checker/charm/charm.c b/harmony_model_checker/charm/charm.c index d74ede1c..aaa1e3bd 100644 --- a/harmony_model_checker/charm/charm.c +++ b/harmony_model_checker/charm/charm.c @@ -137,7 +137,7 @@ struct worker { #define ALIGNMASK 0xF #endif -#ifdef NUMA +#ifdef XNUMA #define WALLOC_BSIZE 4 @@ -165,7 +165,8 @@ static void walloc_producer(void *arg){ while (walloc_bb.occupied >= WALLOC_BSIZE) pthread_cond_wait(&walloc_bb.less, &walloc_bb.mutex); assert(walloc_bb.occupied < WALLOC_BSIZE); - walloc_bb.buf[walloc_bb.nextin++] = aligned_alloc(16, WALLOC_CHUNK); + // walloc_bb.buf[walloc_bb.nextin++] = aligned_alloc(16, WALLOC_CHUNK); + walloc_bb.buf[walloc_bb.nextin++] = numa_alloc_onnode(WALLOC_CHUNK, 0); walloc_bb.nextin %= WALLOC_BSIZE; walloc_bb.occupied++; pthread_cond_signal(&walloc_bb.more); @@ -203,7 +204,7 @@ static void *walloc(void *ctx, unsigned int size, bool zero, bool align16){ w->align_waste += asize - size; if (w->alloc_ptr16 + asize > w->alloc_buf16 + WALLOC_CHUNK) { w->frag_waste += WALLOC_CHUNK - (w->alloc_ptr16 - w->alloc_buf16); -#ifdef NUMA +#ifdef XNUMA w->alloc_buf16 = walloc_consume(); w->alloc_ptr16 = w->alloc_buf16; #else // NUMA @@ -228,7 +229,7 @@ static void *walloc(void *ctx, unsigned int size, bool zero, bool align16){ w->align_waste += asize - size; if (w->alloc_ptr + asize > w->alloc_buf + WALLOC_CHUNK) { w->frag_waste += WALLOC_CHUNK - (w->alloc_ptr - w->alloc_buf); -#ifdef NUMA +#ifdef XNUMA w->alloc_buf = walloc_consume(); #else w->alloc_buf = malloc(WALLOC_CHUNK); @@ -3043,9 +3044,9 @@ static void usage(char *prog){ } int main(int argc, char **argv){ -#ifdef XNUMA #ifdef __linux__ numa_available(); +#ifdef NUMA numa_set_preferred(0); #endif #endif @@ -3457,7 +3458,7 @@ int main(int argc, char **argv){ global->allocated = global->graph.size * sizeof(struct node *) + dict_allocated(visited) + dict_allocated(global->values); -#ifdef NUMA +#ifdef XNUMA thread_create(walloc_producer, &workers[0]); #endif diff --git a/setup.py b/setup.py index add68e9a..abba5243 100644 --- a/setup.py +++ b/setup.py @@ -72,7 +72,6 @@ class CompilerArgs(NamedTuple): ) ] - class BuildExtCommand(build_ext): def build_extension(self, ext) -> None: try: @@ -122,7 +121,9 @@ def get_c_extension_include_dirs(): module = Extension( f"{PROJECT_DIR_NAME}.charm", sources=get_c_extension_src(), - include_dirs=get_c_extension_include_dirs() + include_dirs=get_c_extension_include_dirs(), + extra_compile_args=[], + extra_link_args=["-lnuma"] ) setuptools.setup( From abcad50077cf0294d1d0dd8d2fdac350eabf5fc0 Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Thu, 10 Aug 2023 12:18:21 -0400 Subject: [PATCH 12/14] setup linux specific --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index abba5243..46424ac1 100644 --- a/setup.py +++ b/setup.py @@ -123,7 +123,8 @@ def get_c_extension_include_dirs(): sources=get_c_extension_src(), include_dirs=get_c_extension_include_dirs(), extra_compile_args=[], - extra_link_args=["-lnuma"] + extra_link_args= + ["-lnuma"] if sys.platform.lower().startswith("linux") else [] ) setuptools.setup( From 172c3ee4891f58e391ebf1ee0933a76ae6a3c992 Mon Sep 17 00:00:00 2001 From: Robbert Van Renesse Date: Thu, 10 Aug 2023 12:29:15 -0400 Subject: [PATCH 13/14] Working on NUMA still --- harmony_model_checker/charm/charm.c | 76 ++--------------------------- 1 file changed, 5 insertions(+), 71 deletions(-) diff --git a/harmony_model_checker/charm/charm.c b/harmony_model_checker/charm/charm.c index aaa1e3bd..6c238829 100644 --- a/harmony_model_checker/charm/charm.c +++ b/harmony_model_checker/charm/charm.c @@ -137,58 +137,6 @@ struct worker { #define ALIGNMASK 0xF #endif -#ifdef XNUMA - -#define WALLOC_BSIZE 4 - -struct bounded_buffer { - void *buf[WALLOC_BSIZE]; - int occupied; - int nextin; - int nextout; - pthread_mutex_t mutex; - pthread_cond_t more; - pthread_cond_t less; -} walloc_bb; - -// This thread allocates chunks of memory and puts them in a buffer -static void walloc_producer(void *arg){ -#ifdef __linux__ - struct worker *w = arg; - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(w->vproc, &cpuset); -#endif - - pthread_mutex_lock(&walloc_bb.mutex); - for (;;) { - while (walloc_bb.occupied >= WALLOC_BSIZE) - pthread_cond_wait(&walloc_bb.less, &walloc_bb.mutex); - assert(walloc_bb.occupied < WALLOC_BSIZE); - // walloc_bb.buf[walloc_bb.nextin++] = aligned_alloc(16, WALLOC_CHUNK); - walloc_bb.buf[walloc_bb.nextin++] = numa_alloc_onnode(WALLOC_CHUNK, 0); - walloc_bb.nextin %= WALLOC_BSIZE; - walloc_bb.occupied++; - pthread_cond_signal(&walloc_bb.more); - } - // pthread_mutex_unlock(&walloc_bb.mutex); -} - -void *walloc_consume(){ - pthread_mutex_lock(&walloc_bb.mutex); - while(walloc_bb.occupied <= 0) - pthread_cond_wait(&walloc_bb.more, &walloc_bb.mutex); - assert(walloc_bb.occupied > 0); - void *item = walloc_bb.buf[walloc_bb.nextout++]; - walloc_bb.nextout %= WALLOC_BSIZE; - walloc_bb.occupied--; - pthread_cond_signal(&walloc_bb.less); - pthread_mutex_unlock(&walloc_bb.mutex); - return item; -} - -#endif // NUMA - // Per thread one-time memory allocator (no free()) static void *walloc(void *ctx, unsigned int size, bool zero, bool align16){ struct worker *w = ctx; @@ -204,10 +152,6 @@ static void *walloc(void *ctx, unsigned int size, bool zero, bool align16){ w->align_waste += asize - size; if (w->alloc_ptr16 + asize > w->alloc_buf16 + WALLOC_CHUNK) { w->frag_waste += WALLOC_CHUNK - (w->alloc_ptr16 - w->alloc_buf16); -#ifdef XNUMA - w->alloc_buf16 = walloc_consume(); - w->alloc_ptr16 = w->alloc_buf16; -#else // NUMA #ifdef ALIGNED_ALLOC w->alloc_buf16 = my_aligned_alloc(ALIGNMASK + 1, WALLOC_CHUNK); w->alloc_ptr16 = w->alloc_buf16; @@ -218,7 +162,6 @@ static void *walloc(void *ctx, unsigned int size, bool zero, bool align16){ w->alloc_ptr16 = (char *) ((hvalue_t) (w->alloc_ptr16 + ALIGNMASK) & ~ALIGNMASK); } #endif // ALIGNED_ALLOC -#endif // NUMA w->allocated += WALLOC_CHUNK; } result = w->alloc_ptr16; @@ -229,11 +172,7 @@ static void *walloc(void *ctx, unsigned int size, bool zero, bool align16){ w->align_waste += asize - size; if (w->alloc_ptr + asize > w->alloc_buf + WALLOC_CHUNK) { w->frag_waste += WALLOC_CHUNK - (w->alloc_ptr - w->alloc_buf); -#ifdef XNUMA - w->alloc_buf = walloc_consume(); -#else w->alloc_buf = malloc(WALLOC_CHUNK); -#endif w->alloc_ptr = w->alloc_buf; w->allocated += WALLOC_CHUNK; } @@ -3044,12 +2983,6 @@ static void usage(char *prog){ } int main(int argc, char **argv){ -#ifdef __linux__ - numa_available(); -#ifdef NUMA - numa_set_preferred(0); -#endif -#endif bool cflag = false, dflag = false, Dflag = false, Rflag = false; int i, maxtime = 300000000 /* about 10 years */; char *outfile = NULL, *dfafile = NULL, *worker_flag = NULL; @@ -3438,6 +3371,11 @@ int main(int argc, char **argv){ unsigned int worker_index = 0; vproc_tree_alloc(vproc_root, workers, &worker_index, global->nworkers); +#ifdef __linux__ + numa_available(); + numa_set_preferred(vproc_info[workers[0].vproc].ids[0]); +#endif + // Put the state and value dictionaries in concurrent mode dict_set_concurrent(global->values); dict_set_concurrent(visited); @@ -3458,10 +3396,6 @@ int main(int argc, char **argv){ global->allocated = global->graph.size * sizeof(struct node *) + dict_allocated(visited) + dict_allocated(global->values); -#ifdef XNUMA - thread_create(walloc_producer, &workers[0]); -#endif - // Start all but one of the workers, who'll wait on the start barrier for (unsigned int i = 1; i < global->nworkers; i++) { thread_create(worker, &workers[i]); From c3c413a91a3b51ede1274426f62806f102efe90f Mon Sep 17 00:00:00 2001 From: Robbert van Renesse Date: Thu, 10 Aug 2023 12:31:05 -0400 Subject: [PATCH 14/14] Got rid of unnecessary define --- harmony_model_checker/charm/head.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/harmony_model_checker/charm/head.h b/harmony_model_checker/charm/head.h index 06231435..ee524a2f 100644 --- a/harmony_model_checker/charm/head.h +++ b/harmony_model_checker/charm/head.h @@ -19,5 +19,3 @@ void *my_aligned_alloc(size_t alignment, size_t size); #ifndef __STDC_NO_ATOMICS__ #define USE_ATOMIC #endif - -#define NUMA