Skip to content

Commit

Permalink
triple fix
Browse files Browse the repository at this point in the history
  • Loading branch information
wsmoses committed Jan 16, 2024
1 parent 2947b9f commit 5fb208d
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 45 deletions.
16 changes: 8 additions & 8 deletions enzyme/test/Integration/Sparse/cloth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,28 +209,28 @@ static void err_store(T val, unsigned long long offset, size_t i) {

template<typename T>
__attribute__((always_inline))
static T zero_load(unsigned long long offset, size_t i, std::vector<std::tuple<size_t, size_t, float>> &hess) {
static T zero_load(unsigned long long offset, size_t i, std::vector<Triple<T>> &hess) {
return T(0);
}


__attribute__((enzyme_sparse_accumulate))
void inner_store(size_t offset, size_t i, float val, std::vector<std::tuple<size_t, size_t, float>> &hess) {
hess.push_back(std::tuple<size_t, size_t, float>(offset, i, val));
void inner_store(size_t offset, size_t i, float val, std::vector<Triple<float>> &hess) {
hess.push_back(Triple<float>(offset, i, val));
}


template<typename T>
__attribute__((always_inline))
static void csr_store(T val, unsigned long long offset, size_t i, std::vector<std::tuple<size_t, size_t, T>> &hess) {
static void csr_store(T val, unsigned long long offset, size_t i, std::vector<Triple<T>> &hess) {
if (val == 0.0) return;
offset /= sizeof(T);
inner_store(offset, i, val, hess);
}

template<typename T>
__attribute__((noinline))
std::vector<std::tuple<size_t, size_t, T>> hessian(
std::vector<Triple<T>> hessian(
const T *__restrict__ pos0,
const int *__restrict__ edges,
const int num_edges,
Expand All @@ -244,7 +244,7 @@ std::vector<std::tuple<size_t, size_t, T>> hessian(
const T *__restrict__ pos,
const size_t num_verts)
{
std::vector<std::tuple<size_t, size_t, T>> hess;
std::vector<Triple<T>> hess;
__builtin_assume(num_verts != 0);
for (size_t i=0; i<3*num_verts; i++)
__enzyme_fwddiff<void>((void *)gradient_ip<T>,
Expand Down Expand Up @@ -309,8 +309,8 @@ int main() {
const size_t num_verts = 4;
auto hess_verts = hessian(pos0, edges, num_edges, faces, num_faces, flaps, num_flaps, edge_coefficient, face_coefficient, bending_stiffness, pos, num_verts);

for (auto hess : hess_verts) {
printf("i=%lu, j=%lu, val=%f", std::get<0>(hess), std::get<1>(hess), std::get<2>(hess));
for (auto &hess : hess_verts) {
printf("i=%lu, j=%lu, val=%f", hess.row, hess.col, hess.val);
}

return 0;
Expand Down
55 changes: 28 additions & 27 deletions enzyme/test/Integration/Sparse/eigen_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

#include <vector>
#include <assert.h>
#include <tuple>
#include <stdio.h>
#include <stdint.h>
#include <math.h>
Expand All @@ -18,67 +17,68 @@

#include "matrix.h"


template<typename T>
__attribute__((always_inline))
static float eigenstuffM(const float *__restrict__ x, size_t n, const int *__restrict__ faces, const float *__restrict__ pos0) {
float sum = 0;
static T eigenstuffM(const T *__restrict__ x, size_t n, const int *__restrict__ faces, const T *__restrict__ pos0) {
T sum = 0;
__builtin_assume(n != 0);
for (size_t idx=0; idx<n; idx++) {
int i = faces[3*idx];
int j = faces[3*idx+1];
int k = faces[3*idx+2];
float tri_area = area(&pos0[3*i], &pos0[3*j], &pos0[3*k]);
T tri_area = area(&pos0[3*i], &pos0[3*j], &pos0[3*k]);
sum += (x[i] * x[i] + x[j] * x[j] + x[k] * x[k]) * (1.0 / 3.0) * tri_area; // barycentric mass lumping
}
return sum;
}


// Calculate total energy for all faces in 3D
template<typename T>
__attribute__((always_inline))
static float eigenstuffL(const float *__restrict__ x, size_t num_faces, const int *__restrict__ faces, const float *__restrict__ verts) {
float sum = 0;
static T eigenstuffL(const T *__restrict__ x, size_t num_faces, const int *__restrict__ faces, const T *__restrict__ verts) {
T sum = 0;
__builtin_assume(num_faces != 0);
for (size_t idx=0; idx<num_faces; idx++) {
int i = faces[3*idx];
int j = faces[3*idx+1];
int k = faces[3*idx+2];

float X[2][3] = {
T X[2][3] = {
{ verts[3*j+0] - verts[3*i+0],
verts[3*j+1] - verts[3*i+1],
verts[3*j+2] - verts[3*i+2]},
{verts[3*k+0] - verts[3*i+0], verts[3*k+1] - verts[3*i+1], verts[3*k+2] - verts[3*i+2]}
};

float pInvX[3][2];
T pInvX[3][2];
pseudo_inverse(pInvX, X);

float diffs[] = {x[j] - x[i], x[k] - x[i]};
T diffs[] = {x[j] - x[i], x[k] - x[i]};

float g[3];
T g[3];
#pragma clang loop unroll(full)
for (int i = 0; i < 3; ++i) {
float sum = 0.0f;
T sum = 0.0f;
#pragma clang loop unroll(full)
for (int j = 0; j < 2; ++j) {
sum += pInvX[i][j] * diffs[j];
}
g[i] = sum;
}

sum += dot_product<float, 3>(g, g) * area(&verts[3*i], &verts[3*j], &verts[3*k]);
sum += dot_product<T, 3>(g, g) * area(&verts[3*i], &verts[3*j], &verts[3*k]);
}

return sum;
}



template<typename T>
__attribute__((always_inline))
static void gradient_ip(const float *__restrict__ x, const size_t num_faces, const int* faces, const float *__restrict__ pos, float *__restrict__ out)
static void gradient_ip(const T *__restrict__ x, const size_t num_faces, const int* faces, const T *__restrict__ pos, T *__restrict__ out)
{
__enzyme_autodiff<void>((void *)eigenstuffM,
__enzyme_autodiff<void>((void *)eigenstuffM<T>,
enzyme_const, x,
enzyme_const, num_faces,
enzyme_const, faces,
Expand All @@ -102,37 +102,38 @@ static void err_store(T val, unsigned long long offset, size_t i) {

template<typename T>
__attribute__((always_inline))
static T zero_load(unsigned long long offset, size_t i, std::vector<std::tuple<size_t, size_t, float>> &hess) {
static T zero_load(unsigned long long offset, size_t i, std::vector<Triple<T>> &hess) {
return T(0);
}


__attribute__((enzyme_sparse_accumulate))
void inner_store(size_t offset, size_t i, float val, std::vector<std::tuple<size_t, size_t, float>> &hess) {
hess.push_back(std::tuple<size_t, size_t, float>(offset, i, val));
void inner_store(size_t offset, size_t i, float val, std::vector<Triple<float>> &hess) {
hess.push_back(Triple<float>(offset, i, val));
}

template<typename T>
__attribute__((always_inline))
static void csr_store(T val, unsigned long long offset, size_t i, std::vector<std::tuple<size_t, size_t, T>> &hess) {
static void csr_store(T val, unsigned long long offset, size_t i, std::vector<Triple<T>> &hess) {
if (val == 0.0) return;
offset /= sizeof(T);
inner_store(offset, i, val, hess);
}

template<typename T>
__attribute__((noinline))
std::vector<std::tuple<size_t, size_t, float>> hessian(const float* x, size_t num_faces, const int* faces, const float* pos, size_t num_verts)
std::vector<Triple<T>> hessian(const T* x, size_t num_faces, const int* faces, const T* pos, size_t num_verts)
{

std::vector<std::tuple<size_t, size_t, float>> hess;
std::vector<Triple<T>> hess;
__builtin_assume(num_verts != 0);
for (size_t i=0; i<3*num_verts; i++)
__enzyme_fwddiff<void>((void *)gradient_ip,
__enzyme_fwddiff<void>((void *)gradient_ip<T>,
enzyme_const, x,
enzyme_const, num_faces,
enzyme_const, faces,
enzyme_dup, pos, __enzyme_todense<float*>(ident_load<float>, err_store<float>, i),
enzyme_dupnoneed, nullptr, __enzyme_todense<float*>(zero_load<float>, csr_store<float>, i, &hess));
enzyme_dup, pos, __enzyme_todense<T*>(ident_load<T>, err_store<T>, i),
enzyme_dupnoneed, nullptr, __enzyme_todense<T*>(zero_load<T>, csr_store<T>, i, &hess));
return hess;
}

Expand Down Expand Up @@ -168,8 +169,8 @@ int main() {

auto hess_verts = hessian(x, num_faces, faces, verts, num_verts);

for (auto hess : hess_verts) {
printf("i=%lu, j=%lu, val=%f", std::get<0>(hess), std::get<1>(hess), std::get<2>(hess));
for (auto &hess : hess_verts) {
printf("i=%lu, j=%lu, val=%f", hess.row, hess.col, hess.val);
}

return 0;
Expand Down
20 changes: 10 additions & 10 deletions enzyme/test/Integration/Sparse/elastic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,31 +124,31 @@ static void err_store(T val, unsigned long long offset, size_t i) {

template<typename T>
__attribute__((always_inline))
static T zero_load(unsigned long long offset, size_t i, std::vector<std::tuple<size_t, size_t, T>> &hess) {
static T zero_load(unsigned long long offset, size_t i, std::vector<Triple<T>> &hess) {
return T(0);
}

__attribute__((enzyme_sparse_accumulate))
void inner_store(size_t offset, size_t i, float val, std::vector<std::tuple<size_t, size_t, float>> &hess) {
hess.push_back(std::tuple<size_t, size_t, float>(offset, i, val));
void inner_store(size_t offset, size_t i, float val, std::vector<Triple<float>> &hess) {
hess.push_back(Triple<float>(offset, i, val));
}

__attribute__((enzyme_sparse_accumulate))
void inner_store(size_t offset, size_t i, double val, std::vector<std::tuple<size_t, size_t, double>> &hess) {
hess.push_back(std::tuple<size_t, size_t, double>(offset, i, val));
void inner_store(size_t offset, size_t i, double val, std::vector<Triple<double>> &hess) {
hess.push_back(Triple<double>(offset, i, val));
}

template<typename T>
__attribute__((always_inline))
static void csr_store(T val, unsigned long long offset, size_t i, std::vector<std::tuple<size_t, size_t, T>> &hess) {
static void csr_store(T val, unsigned long long offset, size_t i, std::vector<Triple<T>> &hess) {
if (val == 0.0) return;
offset /= sizeof(T);
inner_store(offset, i, val, hess);
}

template<typename T>
__attribute__((noinline))
std::vector<std::tuple<size_t, size_t, T>> hessian(
std::vector<Triple<T>> hessian(
const T *__restrict__ pos0,
const int tets[][4],
const size_t n,
Expand All @@ -157,7 +157,7 @@ std::vector<std::tuple<size_t, size_t, T>> hessian(
const T *__restrict__ pos,
const size_t num_tets)
{
std::vector<std::tuple<size_t, size_t, T>> hess;
std::vector<Triple<T>> hess;
__builtin_assume(num_tets != 0);
for (size_t i=0; i<4*num_tets; i++)
__enzyme_fwddiff<void>((void *)gradient_ip<T>,
Expand Down Expand Up @@ -208,8 +208,8 @@ int main() {
const size_t num_tets = 1;
auto hess_verts = hessian(pos0, tets, n, youngs_modulus, poisson_ratio, pos, num_tets);

for (auto hess : hess_verts) {
printf("i=%lu, j=%lu, val=%f", std::get<0>(hess), std::get<1>(hess), std::get<2>(hess));
for (auto &hess : hess_verts) {
printf("i=%lu, j=%lu, val=%f", hess.row, hess.col, hess.val);
}

return 0;
Expand Down
10 changes: 10 additions & 0 deletions enzyme/test/Integration/Sparse/matrix.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
#include <cmath>
#include <stdio.h>

template<typename T>
struct Triple {
size_t row;
size_t col;
T val;
Triple(Triple&&) = default;
Triple(size_t row, size_t col, T val) : row(row), col(col), val(val) {}
};

extern int enzyme_width;
extern int enzyme_dup;
Expand Down

0 comments on commit 5fb208d

Please sign in to comment.