Skip to content

Commit

Permalink
Instrumentation for delayed UB stemming from uninitialized memory (#3374
Browse files Browse the repository at this point in the history
)

As #3324 mentioned, delayed UB triggered by accessing uninitialized
memory was previously mitigated by injecting `assert!(false)` for every
possible source of it; this PR adds all the necessary infrastructure and
minimal support for detecting it properly.

The process of detecting and instrumenting places is as follows:
- Find all sources of delayed uninitialized memory UB (mutable pointer
casts with different pointee padding and copy intrinsics);
- Compute conservative aliasing graph between all memory places
reachable from each harness;
- Instrument all places pointed to by each source we found in step 1.

Not all language constructs are currently supported -- see the tracking
issue (#3300) for the full list of limitations.

Resolves #3324

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 and MIT licenses.

---------

Co-authored-by: Celina G. Val <celinval@amazon.com>
  • Loading branch information
artemagvanian and celinval committed Aug 5, 2024
1 parent 5424bc5 commit f2831f4
Show file tree
Hide file tree
Showing 23 changed files with 2,690 additions and 423 deletions.
18 changes: 16 additions & 2 deletions kani-compiler/src/codegen_cprover_gotoc/compiler_interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ impl GotocCodegenBackend {
check_contract: Option<InternalDefId>,
mut transformer: BodyTransformation,
) -> (GotocCtx<'tcx>, Vec<MonoItem>, Option<AssignsContract>) {
// This runs reachability analysis before global passes are applied.
//
// Alternatively, we could run reachability only once after the global passes are applied
// and resolve the necessary dependencies inside the passes on the fly. This, however, has a
// disadvantage of not having a precomputed call graph for the global passes to use. The
// call graph could be used, for example, in resolving function pointer or vtable calls for
// global passes that need this.
let (items, call_graph) = with_timer(
|| collect_reachable_items(tcx, &mut transformer, starting_items),
"codegen reachability analysis",
Expand Down Expand Up @@ -115,6 +122,13 @@ impl GotocCodegenBackend {
call_graph,
);

// Re-collect reachable items after global transformations were applied. This is necessary
// since global pass could add extra calls to instrumentation.
let (items, _) = with_timer(
|| collect_reachable_items(tcx, &mut transformer, starting_items),
"codegen reachability analysis (second pass)",
);

// Follow rustc naming convention (cx is abbrev for context).
// https://rustc-dev-guide.rust-lang.org/conventions.html#naming-conventions
let mut gcx =
Expand Down Expand Up @@ -260,8 +274,8 @@ impl CodegenBackend for GotocCodegenBackend {
for unit in units.iter() {
// We reset the body cache for now because each codegen unit has different
// configurations that affect how we transform the instance body.
let mut transformer = BodyTransformation::new(&queries, tcx, &unit);
for harness in &unit.harnesses {
let transformer = BodyTransformation::new(&queries, tcx, &unit);
let model_path = units.harness_model_path(*harness).unwrap();
let contract_metadata =
contract_metadata_for_harness(tcx, harness.def.def_id());
Expand All @@ -273,7 +287,7 @@ impl CodegenBackend for GotocCodegenBackend {
contract_metadata,
transformer,
);
transformer = results.extend(gcx, items, None);
results.extend(gcx, items, None);
if let Some(assigns_contract) = contract_info {
modifies_instances.push((*harness, assigns_contract));
}
Expand Down
1 change: 1 addition & 0 deletions kani-compiler/src/kani_middle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub mod codegen_units;
pub mod coercion;
mod intrinsics;
pub mod metadata;
pub mod points_to;
pub mod provide;
pub mod reachability;
pub mod resolve;
Expand Down
11 changes: 11 additions & 0 deletions kani-compiler/src/kani_middle/points_to/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! This module contains points-to analysis primitives, such as the graph and types representing its
//! nodes, and the analysis itself.

mod points_to_analysis;
mod points_to_graph;

pub use points_to_analysis::run_points_to_analysis;
pub use points_to_graph::{MemLoc, PointsToGraph};
654 changes: 654 additions & 0 deletions kani-compiler/src/kani_middle/points_to/points_to_analysis.rs

Large diffs are not rendered by default.

222 changes: 222 additions & 0 deletions kani-compiler/src/kani_middle/points_to/points_to_graph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Graph data structure to store the results of points-to analysis.

use rustc_hir::def_id::DefId;
use rustc_middle::{
mir::{Location, Place, ProjectionElem},
ty::{Instance, List, TyCtxt},
};
use rustc_mir_dataflow::{fmt::DebugWithContext, JoinSemiLattice};
use rustc_smir::rustc_internal;
use stable_mir::mir::{
mono::{Instance as StableInstance, StaticDef},
Place as StablePlace,
};
use std::collections::{HashMap, HashSet, VecDeque};

/// A node in the points-to graph, which could be a place on the stack, a heap allocation, or a static.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub enum MemLoc<'tcx> {
/// Notice that the type of `Place` here is not restricted to references or pointers. For
/// example, we propagate aliasing information for values derived from casting a pointer to a
/// usize in order to ensure soundness, as it could later be casted back to a pointer.
Stack(Instance<'tcx>, Place<'tcx>),
/// Using a combination of the instance of the function where the allocation took place and the
/// location of the allocation inside this function implements allocation-site abstraction.
Heap(Instance<'tcx>, Location),
Static(DefId),
}

impl<'tcx> MemLoc<'tcx> {
/// Create a memory location representing a new heap allocation site.
pub fn new_heap_allocation(instance: Instance<'tcx>, location: Location) -> Self {
MemLoc::Heap(instance, location)
}

/// Create a memory location representing a new stack allocation.
pub fn new_stack_allocation(instance: Instance<'tcx>, place: Place<'tcx>) -> Self {
MemLoc::Stack(instance, place)
}

/// Create a memory location representing a new static allocation.
pub fn new_static_allocation(static_def: DefId) -> Self {
MemLoc::Static(static_def)
}

/// Create a memory location representing a new stack allocation from StableMIR values.
pub fn from_stable_stack_allocation(
instance: StableInstance,
place: StablePlace,
tcx: TyCtxt<'tcx>,
) -> Self {
let internal_instance = rustc_internal::internal(tcx, instance);
let internal_place = rustc_internal::internal(tcx, place);
Self::new_stack_allocation(internal_instance, internal_place)
}

/// Create a memory location representing a new static allocation from StableMIR values.
pub fn from_stable_static_allocation(static_def: StaticDef, tcx: TyCtxt<'tcx>) -> Self {
let static_def_id = rustc_internal::internal(tcx, static_def);
Self::new_static_allocation(static_def_id)
}
}

/// Graph data structure that stores the current results of the point-to analysis. The graph is
/// directed, so having an edge between two places means that one is pointing to the other.
///
/// For example:
/// - `a = &b` would translate to `a --> b`
/// - `a = b` would translate to `a --> {all pointees of b}` (if `a` and `b` are pointers /
/// references)
///
/// Note that the aliasing is not field-sensitive, since the nodes in the graph are places with no
/// projections, which is sound but can be imprecise.
///
/// For example:
/// ```
/// let ref_pair = (&a, &b); // Will add `ref_pair --> (a | b)` edges into the graph.
/// let first = ref_pair.0; // Will add `first -> (a | b)`, which is an overapproximation.
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PointsToGraph<'tcx> {
/// A hash map of node --> {nodes} edges.
edges: HashMap<MemLoc<'tcx>, HashSet<MemLoc<'tcx>>>,
}

impl<'tcx> PointsToGraph<'tcx> {
pub fn empty() -> Self {
Self { edges: HashMap::new() }
}

/// Collect all nodes which have incoming edges from `nodes`.
pub fn successors(&self, nodes: &HashSet<MemLoc<'tcx>>) -> HashSet<MemLoc<'tcx>> {
nodes.iter().flat_map(|node| self.edges.get(node).cloned().unwrap_or_default()).collect()
}

/// For each node in `from`, add an edge to each node in `to`.
pub fn extend(&mut self, from: &HashSet<MemLoc<'tcx>>, to: &HashSet<MemLoc<'tcx>>) {
for node in from.iter() {
let node_pointees = self.edges.entry(*node).or_default();
node_pointees.extend(to.iter());
}
}

/// Collect all places to which a given place can alias.
///
/// We automatically resolve dereference projections here (by finding successors for each
/// dereference projection we encounter), which is valid as long as we do it for every place we
/// add to the graph.
pub fn resolve_place(
&self,
place: Place<'tcx>,
instance: Instance<'tcx>,
) -> HashSet<MemLoc<'tcx>> {
let place_without_projections = Place { local: place.local, projection: List::empty() };
let mut node_set =
HashSet::from([MemLoc::new_stack_allocation(instance, place_without_projections)]);
for projection in place.projection {
match projection {
ProjectionElem::Deref => {
node_set = self.successors(&node_set);
}
ProjectionElem::Field(..)
| ProjectionElem::Index(..)
| ProjectionElem::ConstantIndex { .. }
| ProjectionElem::Subslice { .. }
| ProjectionElem::Downcast(..)
| ProjectionElem::OpaqueCast(..)
| ProjectionElem::Subtype(..) => {
/* There operations are no-ops w.r.t aliasing since we are tracking it on per-object basis. */
}
}
}
node_set
}

/// Stable interface for `resolve_place`.
pub fn resolve_place_stable(
&self,
place: StablePlace,
instance: StableInstance,
tcx: TyCtxt<'tcx>,
) -> HashSet<MemLoc<'tcx>> {
let internal_place = rustc_internal::internal(tcx, place);
let internal_instance = rustc_internal::internal(tcx, instance);
self.resolve_place(internal_place, internal_instance)
}

/// Dump the graph into a file using the graphviz format for later visualization.
pub fn dump(&self, file_path: &str) {
let mut nodes: Vec<String> =
self.edges.keys().map(|from| format!("\t\"{:?}\"", from)).collect();
nodes.sort();
let nodes_str = nodes.join("\n");

let mut edges: Vec<String> = self
.edges
.iter()
.flat_map(|(from, to)| {
let from = format!("\"{:?}\"", from);
to.iter().map(move |to| {
let to = format!("\"{:?}\"", to);
format!("\t{} -> {}", from.clone(), to)
})
})
.collect();
edges.sort();
let edges_str = edges.join("\n");

std::fs::write(file_path, format!("digraph {{\n{}\n{}\n}}", nodes_str, edges_str)).unwrap();
}

/// Find a transitive closure of the graph starting from a set of given locations; this also
/// includes statics.
pub fn transitive_closure(&self, targets: HashSet<MemLoc<'tcx>>) -> PointsToGraph<'tcx> {
let mut result = PointsToGraph::empty();
// Working queue.
let mut queue = VecDeque::from_iter(targets);
// Add all statics, as they can be accessed at any point.
let statics = self.edges.keys().filter(|node| matches!(node, MemLoc::Static(_)));
queue.extend(statics);
// Add all entries.
while let Some(next_target) = queue.pop_front() {
result.edges.entry(next_target).or_insert_with(|| {
let outgoing_edges =
self.edges.get(&next_target).cloned().unwrap_or(HashSet::new());
queue.extend(outgoing_edges.iter());
outgoing_edges.clone()
});
}
result
}

/// Retrieve all places to which a given place is pointing to.
pub fn pointees_of(&self, target: &MemLoc<'tcx>) -> HashSet<MemLoc<'tcx>> {
self.edges.get(&target).unwrap_or(&HashSet::new()).clone()
}
}

/// Since we are performing the analysis using a dataflow, we need to implement a proper monotonous
/// join operation. In our case, this is a simple union of two graphs. This "lattice" is finite,
/// because in the worst case all places will alias to all places, in which case the join will be a
/// no-op.
impl<'tcx> JoinSemiLattice for PointsToGraph<'tcx> {
fn join(&mut self, other: &Self) -> bool {
let mut updated = false;
// Check every node in the other graph.
for (from, to) in other.edges.iter() {
let existing_to = self.edges.entry(*from).or_default();
let initial_size = existing_to.len();
existing_to.extend(to);
let new_size = existing_to.len();
updated |= initial_size != new_size;
}
updated
}
}

/// This is a requirement for the fixpoint solver, and there is no derive macro for this, so
/// implement it manually.
impl<'tcx, C> DebugWithContext<C> for PointsToGraph<'tcx> {}
Loading

0 comments on commit f2831f4

Please sign in to comment.