diff --git a/src/skelcast/data/human36m/human36m.py b/src/skelcast/data/human36m/human36m.py
index 00578f8..1613d2b 100644
--- a/src/skelcast/data/human36m/human36m.py
+++ b/src/skelcast/data/human36m/human36m.py
@@ -16,28 +16,29 @@ def __init__(self, path, skeleton, fps):
         self._fps = fps
         self._use_gpu = False
         self._skeleton = skeleton
-        
+
     def cuda(self):
         self._use_gpu = True
         self._skeleton.cuda()
         return self
-        
+
     def _load(self, path):
         result = {}
-        data = np.load(path, 'r')
-        for i, (trajectory, rotations, subject, action) in enumerate(zip(data['trajectories'],
-                                                                         data['rotations'],
-                                                                         data['subjects'],
-                                                                         data['actions'])):
+        data = np.load(path, allow_pickle=True)
+        for i, (trajectory, rotations, subject, action) in enumerate(
+            zip(
+                data["positions"],
+                data["rotations"],
+                data["subjects"],
+                data["actions"],
+            )
+        ):
             if subject not in result:
                 result[subject] = {}
-            
-            result[subject][action] = {
-                'rotations': rotations,
-                'trajectory': trajectory
-            }
+
+            result[subject][action] = {"rotations": rotations, "trajectory": trajectory}
         return result
-        
+
     def downsample(self, factor, keep_strides=True):
         """
         Downsample this dataset by an integer factor, keeping all strides of the data
@@ -47,7 +48,7 @@ def downsample(self, factor, keep_strides=True):
         will have '_d0', ... '_dn' appended to their names.
         """
         assert self._fps % factor == 0
-        
+
         for subject in self._data.keys():
             new_actions = {}
             for action in list(self._data[subject].keys()):
@@ -55,32 +56,32 @@ def downsample(self, factor, keep_strides=True):
                     tup = {}
                     for k in self._data[subject][action].keys():
                         tup[k] = self._data[subject][action][k][idx::factor]
-                    new_actions[action + '_d' + str(idx)] = tup
+                    new_actions[action + "_d" + str(idx)] = tup
                     if not keep_strides:
                         break
             self._data[subject] = new_actions
-            
+
         self._fps //= factor
-        
+
     def _mirror_sequence(self, sequence):
-        mirrored_rotations = sequence['rotations'].copy()
-        mirrored_trajectory = sequence['trajectory'].copy()
-        
+        mirrored_rotations = sequence["rotations"].copy()
+        mirrored_trajectory = sequence["trajectory"].copy()
+
         joints_left = self._skeleton.joints_left()
         joints_right = self._skeleton.joints_right()
-        
+
         # Flip left/right joints
-        mirrored_rotations[:, joints_left] = sequence['rotations'][:, joints_right]
-        mirrored_rotations[:, joints_right] = sequence['rotations'][:, joints_left]
-        
+        mirrored_rotations[:, joints_left] = sequence["rotations"][:, joints_right]
+        mirrored_rotations[:, joints_right] = sequence["rotations"][:, joints_left]
+
         mirrored_rotations[:, :, [2, 3]] *= -1
         mirrored_trajectory[:, 0] *= -1
 
         return {
-            'rotations': qfix(mirrored_rotations),
-            'trajectory': mirrored_trajectory
+            "rotations": qfix(mirrored_rotations),
+            "trajectory": mirrored_trajectory,
         }
-    
+
     def mirror(self):
         """
         Perform data augmentation by mirroring every sequence in the dataset.
@@ -88,54 +89,66 @@ def mirror(self):
         """
         for subject in self._data.keys():
             for action in list(self._data[subject].keys()):
-                if '_m' in action:
+                if "_m" in action:
                     continue
-                self._data[subject][action + '_m'] = self._mirror_sequence(self._data[subject][action])
-                
+                self._data[subject][action + "_m"] = self._mirror_sequence(
+                    self._data[subject][action]
+                )
+
     def compute_euler_angles(self, order):
         for subject in self._data.values():
             for action in subject.values():
-                action['rotations_euler'] = qeuler_np(action['rotations'], order, use_gpu=self._use_gpu)
-                
+                action["rotations_euler"] = qeuler_np(
+                    action["rotations"], order, use_gpu=self._use_gpu
+                )
+
     def compute_positions(self):
         for subject in self._data.values():
             for action in subject.values():
-                rotations = torch.from_numpy(action['rotations'].astype('float32')).unsqueeze(0)
-                trajectory = torch.from_numpy(action['trajectory'].astype('float32')).unsqueeze(0)
+                rotations = torch.from_numpy(
+                    action["rotations"].astype("float32")
+                ).unsqueeze(0)
+                trajectory = torch.from_numpy(
+                    action["trajectory"].astype("float32")
+                ).unsqueeze(0)
                 if self._use_gpu:
                     rotations = rotations.cuda()
                     trajectory = trajectory.cuda()
-                action['positions_world'] = self._skeleton.forward_kinematics(rotations, trajectory).squeeze(0).cpu().numpy()
-                
+                action["positions_world"] = (
+                    self._skeleton.forward_kinematics(rotations, trajectory)
+                    .squeeze(0)
+                    .cpu()
+                    .numpy()
+                )
+
                 # Absolute translations across the XY plane are removed here
                 trajectory[:, :, [0, 2]] = 0
-                action['positions_local'] = self._skeleton.forward_kinematics(rotations, trajectory).squeeze(0).cpu().numpy()
-                
-        
-    def __getitem__(self, key):
-        return self._data[key]
-    
-        
+                action["positions_local"] = (
+                    self._skeleton.forward_kinematics(rotations, trajectory)
+                    .squeeze(0)
+                    .cpu()
+                    .numpy()
+                )
+
+    # def __getitem__(self, key):
+    #     return self._data[key]
+
     def subjects(self):
         return self._data.keys()
-    
-        
+
     def subject_actions(self, subject):
         return self._data[subject].keys()
-        
-        
+
     def all_actions(self):
         result = []
         for subject, actions in self._data.items():
             for action in actions.keys():
                 result.append((subject, action))
         return result
-    
-    
+
     def fps(self):
         return self._fps
-    
-    
+
     def skeleton(self):
         return self._skeleton
 
@@ -145,44 +158,89 @@ class Human36MDataset(MocapDataset):
     """
     TODO: Possibly add a flatten_data method for easy accessing with a single index.
     """
+
     def __init__(self, path, seq_len=27):
-        skeleton = Skeleton(offsets=[
-       [   0.      ,    0.      ,    0.      ],
-       [-132.948591,    0.      ,    0.      ],
-       [   0.      , -442.894612,    0.      ],
-       [   0.      , -454.206447,    0.      ],
-       [   0.      ,    0.      ,  162.767078],
-       [   0.      ,    0.      ,   74.999437],
-       [ 132.948826,    0.      ,    0.      ],
-       [   0.      , -442.894413,    0.      ],
-       [   0.      , -454.20659 ,    0.      ],
-       [   0.      ,    0.      ,  162.767426],
-       [   0.      ,    0.      ,   74.999948],
-       [   0.      ,    0.1     ,    0.      ],
-       [   0.      ,  233.383263,    0.      ],
-       [   0.      ,  257.077681,    0.      ],
-       [   0.      ,  121.134938,    0.      ],
-       [   0.      ,  115.002227,    0.      ],
-       [   0.      ,  257.077681,    0.      ],
-       [   0.      ,  151.034226,    0.      ],
-       [   0.      ,  278.882773,    0.      ],
-       [   0.      ,  251.733451,    0.      ],
-       [   0.      ,    0.      ,    0.      ],
-       [   0.      ,    0.      ,   99.999627],
-       [   0.      ,  100.000188,    0.      ],
-       [   0.      ,    0.      ,    0.      ],
-       [   0.      ,  257.077681,    0.      ],
-       [   0.      ,  151.031437,    0.      ],
-       [   0.      ,  278.892924,    0.      ],
-       [   0.      ,  251.72868 ,    0.      ],
-       [   0.      ,    0.      ,    0.      ],
-       [   0.      ,    0.      ,   99.999888],
-       [   0.      ,  137.499922,    0.      ],
-       [   0.      ,    0.      ,    0.      ]
-    ],
-    parents=[-1,  0,  1,  2,  3,  4,  0,  6,  7,  8,  9,  0, 11, 12, 13, 14, 12,
-       16, 17, 18, 19, 20, 19, 22, 12, 24, 25, 26, 27, 28, 27, 30],
-    joints_left=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31],
-    joints_right=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23])
+        skeleton = Skeleton(
+            offsets=[
+                [0.0, 0.0, 0.0],
+                [-132.948591, 0.0, 0.0],
+                [0.0, -442.894612, 0.0],
+                [0.0, -454.206447, 0.0],
+                [0.0, 0.0, 162.767078],
+                [0.0, 0.0, 74.999437],
+                [132.948826, 0.0, 0.0],
+                [0.0, -442.894413, 0.0],
+                [0.0, -454.20659, 0.0],
+                [0.0, 0.0, 162.767426],
+                [0.0, 0.0, 74.999948],
+                [0.0, 0.1, 0.0],
+                [0.0, 233.383263, 0.0],
+                [0.0, 257.077681, 0.0],
+                [0.0, 121.134938, 0.0],
+                [0.0, 115.002227, 0.0],
+                [0.0, 257.077681, 0.0],
+                [0.0, 151.034226, 0.0],
+                [0.0, 278.882773, 0.0],
+                [0.0, 251.733451, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 99.999627],
+                [0.0, 100.000188, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 257.077681, 0.0],
+                [0.0, 151.031437, 0.0],
+                [0.0, 278.892924, 0.0],
+                [0.0, 251.72868, 0.0],
+                [0.0, 0.0, 0.0],
+                [0.0, 0.0, 99.999888],
+                [0.0, 137.499922, 0.0],
+                [0.0, 0.0, 0.0],
+            ],
+            parents=[
+                -1,
+                0,
+                1,
+                2,
+                3,
+                4,
+                0,
+                6,
+                7,
+                8,
+                9,
+                0,
+                11,
+                12,
+                13,
+                14,
+                12,
+                16,
+                17,
+                18,
+                19,
+                20,
+                19,
+                22,
+                12,
+                24,
+                25,
+                26,
+                27,
+                28,
+                27,
+                30,
+            ],
+            joints_left=[1, 2, 3, 4, 5, 24, 25, 26, 27, 28, 29, 30, 31],
+            joints_right=[6, 7, 8, 9, 10, 16, 17, 18, 19, 20, 21, 22, 23],
+        )
         super().__init__(path, skeleton, fps=50)
         self.compute_positions()
+        self._dataset_flat = []
+        for subject in ['S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11']:
+            for action in list(self._data[subject].keys()):
+                self._dataset_flat.append(self._data[subject][action]['rotations'])
+
+    def __getitem__(self, index):
+        return self._dataset_flat[index]
+
+    def __len__(self):
+        return len(self._dataset_flat)
\ No newline at end of file
diff --git a/src/skelcast/data/human36m/skeleton.py b/src/skelcast/data/human36m/skeleton.py
index ce9d532..49a1161 100644
--- a/src/skelcast/data/human36m/skeleton.py
+++ b/src/skelcast/data/human36m/skeleton.py
@@ -6,18 +6,29 @@
 #
 
 import numpy as np
+import torch
+
+from skelcast.data.human36m.quaternion import qmul_np, qmul, qrot
 
 class Skeleton:
-    def __init__(self, parents, joints_left, joints_right):
-        assert len(joints_left) == len(joints_right)
+    def __init__(self, offsets, parents, joints_left=None, joints_right=None):
+        assert len(offsets) == len(parents)
         
+        self._offsets = torch.FloatTensor(offsets)
         self._parents = np.array(parents)
         self._joints_left = joints_left
         self._joints_right = joints_right
         self._compute_metadata()
     
+    def cuda(self):
+        self._offsets = self._offsets.cuda()
+        return self
+    
     def num_joints(self):
-        return len(self._parents)
+        return self._offsets.shape[0]
+    
+    def offsets(self):
+        return self._offsets
     
     def parents(self):
         return self._parents
@@ -28,19 +39,27 @@ def has_children(self):
     def children(self):
         return self._children
     
-    def remove_joints(self, joints_to_remove):
+    def remove_joints(self, joints_to_remove, dataset):
         """
-        Remove the joints specified in 'joints_to_remove'.
+        Remove the joints specified in 'joints_to_remove', both from the
+        skeleton definition and from the dataset (which is modified in place).
+        The rotations of removed joints are propagated along the kinematic chain.
         """
         valid_joints = []
         for joint in range(len(self._parents)):
             if joint not in joints_to_remove:
                 valid_joints.append(joint)
-
-        for i in range(len(self._parents)):
-            while self._parents[i] in joints_to_remove:
-                self._parents[i] = self._parents[self._parents[i]]
                 
+        # Update all transformations in the dataset
+        for subject in dataset.subjects():
+            for action in dataset[subject].keys():
+                rotations = dataset[subject][action]['rotations']
+                for joint in joints_to_remove:
+                    for child in self._children[joint]:
+                        rotations[:, child] = qmul_np(rotations[:, joint], rotations[:, child])
+                    rotations[:, joint] = [1, 0, 0, 0] # Identity
+                dataset[subject][action]['rotations'] = rotations[:, valid_joints]
+
         index_offsets = np.zeros(len(self._parents), dtype=int)
         new_parents = []
         for i, parent in enumerate(self._parents):
@@ -49,24 +68,41 @@ def remove_joints(self, joints_to_remove):
             else:
                 index_offsets[i:] += 1
         self._parents = np.array(new_parents)
-        
-        
-        if self._joints_left is not None:
-            new_joints_left = []
-            for joint in self._joints_left:
-                if joint in valid_joints:
-                    new_joints_left.append(joint - index_offsets[joint])
-            self._joints_left = new_joints_left
-        if self._joints_right is not None:
-            new_joints_right = []
-            for joint in self._joints_right:
-                if joint in valid_joints:
-                    new_joints_right.append(joint - index_offsets[joint])
-            self._joints_right = new_joints_right
 
+        self._offsets = self._offsets[valid_joints]
         self._compute_metadata()
         
-        return valid_joints
+    def forward_kinematics(self, rotations, root_positions):
+        """
+        Perform forward kinematics using the given trajectory and local rotations.
+        Arguments (where N = batch size, L = sequence length, J = number of joints):
+         -- rotations: (N, L, J, 4) tensor of unit quaternions describing the local rotations of each joint.
+         -- root_positions: (N, L, 3) tensor describing the root joint positions.
+        """
+        assert len(rotations.shape) == 4
+        assert rotations.shape[-1] == 4
+
+        positions_world = []
+        rotations_world = []
+
+        expanded_offsets = self._offsets.expand(rotations.shape[0], rotations.shape[1],
+                                                   self._offsets.shape[0], self._offsets.shape[1])
+
+        # Parallelize along the batch and time dimensions
+        for i in range(self._offsets.shape[0]):
+            if self._parents[i] == -1:
+                positions_world.append(root_positions)
+                rotations_world.append(rotations[:, :, 0])
+            else:
+                positions_world.append(qrot(rotations_world[self._parents[i]], expanded_offsets[:, :, i]) \
+                                       + positions_world[self._parents[i]])
+                if self._has_children[i]:
+                    rotations_world.append(qmul(rotations_world[self._parents[i]], rotations[:, :, i]))
+                else:
+                    # This joint is a terminal node -> it would be useless to compute the transformation
+                    rotations_world.append(None)
+
+        return torch.stack(positions_world, dim=3).permute(0, 1, 3, 2)
     
     def joints_left(self):
         return self._joints_left