Matrix 4x4 Multiply with Vector (floats)

(Please see Matrix 4x4 addition/subtraction (floats) for the typedefs and definitions used.)

void Mat44MulVec(Vec3f vout, Mat44 mat, Vec4f vin)
{
        vector float vm_1, vm_2, vm_3, vm_4,
                     vec, vec_1, vec_2, vec_3, 
                     vr, vr_1, vr_2, vr_3, v0;
 
        // Load matrix and vector
        LOAD_ALIGNED_MATRIX(mat, vm_1, vm_2, vm_3, vm_4);
        LOAD_ALIGNED_VECTOR(vec, vin);
 
        v0 = (vector float) vec_splat_u32(0);
        vec = vec_ld(0, (float *)vec);
        vec_1 = vec_splat(vec, 0);
        vec_2 = vec_splat(vec, 1);
        vec_3 = vec_splat(ve0, 2);
 
        // Do the vector x matrix multiplication
        vr_1 = vec_madd(vm_1, vec_1, v0);
        vr_2 = vec_madd(vm_2, vec_2, vr_1);
        vr_3 = vec_madd(vm_3, vec_3, vr_2);
        vr   = vec_add(vr_3, vm_4);
 
        // Store back the result
        STORE_ALIGNED_VECTOR(vr, vout);
}