Simple example using only the compute topology. The pagemap tool can be found here: https://forge.imag.fr/projects/pagemap/ Note that you will certainly need to be root to be able to use pagemap.
require 'hwloc'
require 'ffi'
# helper function to print memory location (may need pagemap on older version of hwloc)
def print_pointer_location(ptr, t)
if t.respond_to? :get_area_memlocation
page_number = (ptr.size.to_f / $page_size).ceil
base_address = ptr.address - ( ptr.address % $page_size )
ptrs = page_number.times.collect { |i|
FFI::Pointer::new(base_address + i*$page_size).slice(0, $page_size)
}
ptrs.each { |ptr|
p t.get_area_memlocation(ptr, :MEMBIND_BYNODESET)
}
else
puts "pagemap #{Process::pid} -n #{ptr.address.to_s(16)}-#{(ptr.address+ptr.size-1).to_s(16)}"
puts `pagemap #{Process::pid} -n #{ptr.address.to_s(16)}-#{(ptr.address+ptr.size-1).to_s(16)}`
end
end
t = Hwloc::Topology::new
if Hwloc::API_VERSION < Hwloc::API_VERSION_2_0
t.flags = Hwloc::Topology::FLAG_ICACHES
else
t.set_icache_types_filter(:TYPE_FILTER_KEEP_ALL)
end
t.load
$page_size = t.machines.first.memory.page_types.first.size
#Get some info on the machine:
o = t.root_obj
puts o.infos
#Print all the object doing a depth first traversal:
t.each { |o|
puts o
}
#Print all the objects doing a breadth first traversal:
t.depth.times { |d|
puts t.each_by_depth(d).to_a.join(", ")
}
# find the number of level of caches on the machine and their size:
first_core = t.cores.first
caches = first_core.ancestors.take_while{ |o| o.is_a_cache? }
caches.each_with_index { |c,i|
puts "#{c.type_name}: #{c.attr.size/1024}KiB"
}
#migrate the execution to different OBJ_PU
t.pus.shuffle.first(3).each { |pu|
t.set_cpubind(pu.cpuset)
puts "Processing on #{pu} #P#{pu.os_index}"
i = 0
(1<<26).times { i+=1 }
}
#allocate memory on different nodes using hwloc (if you have any)
if t.numanodes.length > 0 then
ptrs = t.numanodes.collect { |n|
ptr = t.alloc_membind(10*4*1024, n.cpuset, :MEMBIND_BIND)
ptr.clear
}
sleep 1
ptrs.each { |ptr|
p t.get_area_membind(ptr)
print_pointer_location(ptr, t)
puts
}
end
#migrating memory using hwloc (We don't control alignment so last page of each allocation can be migrated twice because it overlaps two memory areas)
if t.numanodes.length > 0 then
ptrs = t.numanodes.collect { |n|
ptr = FFI::MemoryPointer::new(10*4*1024)
t.set_area_membind(ptr, n.cpuset, :MEMBIND_BIND, :MEMBIND_MIGRATE)
ptr.clear
}
sleep 1
ptrs.each { |ptr|
p t.get_area_membind(ptr)
print_pointer_location(ptr, t)
puts
}
end
#allocate and migrate memory in an interleaved way
ptr = FFI::MemoryPointer::new(10*4*1024)
t.set_area_membind(ptr, t.machines.first.cpuset, :MEMBIND_INTERLEAVE, :MEMBIND_MIGRATE)
p t.get_area_membind(ptr)
print_pointer_location(ptr, t)