From ff48a2b7d5df88cc57bf6905738b4e9397db4533 Mon Sep 17 00:00:00 2001 From: slipher Date: Tue, 11 Jun 2024 14:42:48 -0500 Subject: [PATCH] Introduce runtime VBO layout; do IQM, MD5 Generate the layout for interleaved vertex attribute data at runtime. The motivation for this is to support OpenGL implementations that don't provide half float support (https://github.com/DaemonEngine/Daemon/pull/1179). The vertex "struct" may contain a 16-bit or 32-bit float, depending on the graphics card. Now, instead of defining a struct for the data to be uploaded into a VBO, one must separately specify inputs for each attribute. The input is defined by a type, base address, stride, etc.; very similarly to the arguments of glVertexAttribPointer itself. The new version of R_CreateStaticVBO takes these inputs and writes them to an interleaved format, performing any neede type conversions along the way. In this commit just skeletal models (IQM and MD5) are migrated to the new method. --- src/engine/renderer/tr_local.h | 25 ++- src/engine/renderer/tr_model_iqm.cpp | 44 ++-- src/engine/renderer/tr_model_skel.cpp | 57 ++--- src/engine/renderer/tr_public.h | 1 + src/engine/renderer/tr_vbo.cpp | 295 +++++++++++++++++--------- src/engine/sys/sdl_glimp.cpp | 6 + 6 files changed, 268 insertions(+), 160 deletions(-) diff --git a/src/engine/renderer/tr_local.h b/src/engine/renderer/tr_local.h index bbb4222ec9..2372b06223 100644 --- a/src/engine/renderer/tr_local.h +++ b/src/engine/renderer/tr_local.h @@ -660,8 +660,8 @@ enum class realtimeLightingRenderer_t { LEGACY, TILED }; enum class vboLayout_t { + VBO_LAYOUT_CUSTOM, VBO_LAYOUT_VERTEX_ANIMATION, - VBO_LAYOUT_SKELETAL, VBO_LAYOUT_STATIC, VBO_LAYOUT_XYST }; @@ -672,13 +672,27 @@ enum class realtimeLightingRenderer_t { LEGACY, TILED }; i16vec4_t *qtangent; u8vec4_t *color; union { f16vec2_t *st; vec2_t *stf; }; - int (*boneIndexes)[ 4 ]; - vec4_t *boneWeights; int numFrames; int numVerts; }; + enum + { + ATTR_OPTION_NORMALIZE = BIT( 0 ), + }; + + struct vertexAttributeSpec_t + { + int attrIndex; + GLenum componentInputType; + GLenum componentStorageType; + const void *begin; + uint32_t numComponents; + uint32_t stride; + int attrOptions; + }; + struct VBO_t { char name[ 96 ]; // only for debugging with /listVBOs @@ -690,7 +704,7 @@ enum class realtimeLightingRenderer_t { LEGACY, TILED }; uint32_t vertexesNum; uint32_t framesNum; // number of frames for vertex animation - vboAttributeLayout_t attribs[ ATTR_INDEX_MAX ]; // info for buffer manipulation + std::array attribs; // info for buffer manipulation vboLayout_t layout; uint32_t attribBits; // Which attributes it has. Mostly for detecting errors @@ -3477,6 +3491,9 @@ inline bool checkGLErrors() ============================================================ */ + VBO_t *R_CreateStaticVBO( + Str::StringRef name, const vertexAttributeSpec_t *attrBegin, const vertexAttributeSpec_t *attrEnd, + uint32_t numVerts ); VBO_t *R_CreateStaticVBO( const char *name, vboData_t data, vboLayout_t layout ); VBO_t *R_CreateStaticVBO2( const char *name, int numVertexes, shaderVertex_t *verts, uint32_t stateBits ); diff --git a/src/engine/renderer/tr_model_iqm.cpp b/src/engine/renderer/tr_model_iqm.cpp index c2a6faba48..388b99ab30 100644 --- a/src/engine/renderer/tr_model_iqm.cpp +++ b/src/engine/renderer/tr_model_iqm.cpp @@ -770,21 +770,13 @@ bool R_LoadIQModel( model_t *mod, const void *buffer, int filesize, if( r_vboModels->integer && glConfig2.vboVertexSkinningAvailable && IQModel->num_joints <= glConfig2.maxVertexSkinningBones ) { - int *indexbuf = (int *)ri.Hunk_AllocateTempMemory( sizeof(int[4]) * IQModel->num_vertexes ); - for(int i = 0; i < IQModel->num_vertexes; i++ ) { - indexbuf[ 4 * i + 0 ] = IQModel->blendIndexes[ 4 * i + 0 ]; - indexbuf[ 4 * i + 1 ] = IQModel->blendIndexes[ 4 * i + 1 ]; - indexbuf[ 4 * i + 2 ] = IQModel->blendIndexes[ 4 * i + 2 ]; - indexbuf[ 4 * i + 3 ] = IQModel->blendIndexes[ 4 * i + 3 ]; - } + uint16_t *boneFactorBuf = (uint16_t*)ri.Hunk_AllocateTempMemory( IQModel->num_vertexes * ( 4 * sizeof(uint16_t) ) ); - const float weightscale = 1.0f / 255.0f; - float *weightbuf = (float *)ri.Hunk_AllocateTempMemory( sizeof(vec4_t) * IQModel->num_vertexes ); - for(int i = 0; i < IQModel->num_vertexes; i++ ) { - weightbuf[ 4 * i + 0 ] = weightscale * IQModel->blendWeights[ 4 * i + 0 ]; - weightbuf[ 4 * i + 1 ] = weightscale * IQModel->blendWeights[ 4 * i + 1 ]; - weightbuf[ 4 * i + 2 ] = weightscale * IQModel->blendWeights[ 4 * i + 2 ]; - weightbuf[ 4 * i + 3 ] = weightscale * IQModel->blendWeights[ 4 * i + 3 ]; + for (int i = 0; i < IQModel->num_vertexes; i++ ) { + boneFactorBuf[ 4 * i + 0 ] = uint16_t(IQModel->blendWeights[ 4 * i + 0 ]) << 8 | IQModel->blendIndexes[ 4 * i + 0 ]; + boneFactorBuf[ 4 * i + 1 ] = uint16_t(IQModel->blendWeights[ 4 * i + 1 ]) << 8 | IQModel->blendIndexes[ 4 * i + 1 ]; + boneFactorBuf[ 4 * i + 2 ] = uint16_t(IQModel->blendWeights[ 4 * i + 2 ]) << 8 | IQModel->blendIndexes[ 4 * i + 2 ]; + boneFactorBuf[ 4 * i + 3 ] = uint16_t(IQModel->blendWeights[ 4 * i + 3 ]) << 8 | IQModel->blendIndexes[ 4 * i + 3 ]; } i16vec4_t *qtangentbuf = static_cast( @@ -797,24 +789,20 @@ bool R_LoadIQModel( model_t *mod, const void *buffer, int filesize, qtangentbuf[ i ] ); } - vboData_t vboData{}; - - vboData.xyz = (vec3_t *)IQModel->positions; - vboData.qtangent = qtangentbuf; - vboData.numFrames = 0; - vboData.color = (u8vec4_t *)IQModel->colors; - vboData.st = (f16vec2_t *)IQModel->texcoords; - vboData.boneIndexes = (int (*)[4])indexbuf; - vboData.boneWeights = (vec4_t *)weightbuf; - vboData.numVerts = IQModel->num_vertexes; + const vertexAttributeSpec_t attrs[] { + { ATTR_INDEX_BONE_FACTORS, GL_UNSIGNED_SHORT, GL_UNSIGNED_SHORT, boneFactorBuf, 4, sizeof( u16vec4_t ), 0 }, + { ATTR_INDEX_POSITION, GL_FLOAT, GL_SHORT, IQModel->positions, 3, sizeof( float[ 3 ] ), ATTR_OPTION_NORMALIZE }, + { ATTR_INDEX_QTANGENT, GL_SHORT, GL_SHORT, qtangentbuf, 4, sizeof( i16vec4_t ), ATTR_OPTION_NORMALIZE, }, + { ATTR_INDEX_TEXCOORD, GL_HALF_FLOAT, GL_HALF_FLOAT, IQModel->texcoords, 2, sizeof( f16_t[ 2 ] ), 0 }, + { ATTR_INDEX_COLOR, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, IQModel->colors, 4, sizeof( u8vec4_t ), ATTR_OPTION_NORMALIZE }, + }; std::string name = mod->name; - vbo = R_CreateStaticVBO( ( "IQM surface VBO " + name ).c_str(), vboData, - vboLayout_t::VBO_LAYOUT_SKELETAL ); + vbo = R_CreateStaticVBO( "IQM surface VBO " + name, + std::begin( attrs ), std::end( attrs ), IQModel->num_vertexes ); ri.Hunk_FreeTempMemory( qtangentbuf ); - ri.Hunk_FreeTempMemory( weightbuf ); - ri.Hunk_FreeTempMemory( indexbuf ); + ri.Hunk_FreeTempMemory( boneFactorBuf ); // create IBO ibo = R_CreateStaticIBO( ( "IQM surface IBO " + name ).c_str(), diff --git a/src/engine/renderer/tr_model_skel.cpp b/src/engine/renderer/tr_model_skel.cpp index 662e70dbd2..8370dd59a6 100644 --- a/src/engine/renderer/tr_model_skel.cpp +++ b/src/engine/renderer/tr_model_skel.cpp @@ -92,6 +92,12 @@ bool R_AddTriangleToVBOTriangleList( return hasWeights; } +// index has to be in range 0-255, weight has to be >= 0 and <= 1 +static unsigned short boneFactor( int index, float weight ) { + int scaledWeight = lrintf( weight * 255.0F ); + return (unsigned short)( ( scaledWeight << 8 ) | index ); +} + srfVBOMD5Mesh_t *R_GenerateMD5VBOSurface( Str::StringRef surfName, const std::vector &vboTriangles, md5Model_t *md5, md5Surface_t *surf, int skinIndex, int boneReferences[ MAX_BONES ] ) @@ -118,15 +124,8 @@ srfVBOMD5Mesh_t *R_GenerateMD5VBOSurface( vboSurf->numIndexes = indexesNum; vboSurf->numVerts = vertexesNum; - vboData_t data{}; - - data.xyz = ( vec3_t * ) ri.Hunk_AllocateTempMemory( sizeof( *data.xyz ) * vertexesNum ); - data.qtangent = ( i16vec4_t * ) ri.Hunk_AllocateTempMemory( sizeof( i16vec4_t ) * vertexesNum ); - data.boneIndexes = ( int (*)[ 4 ] ) ri.Hunk_AllocateTempMemory( sizeof( *data.boneIndexes ) * vertexesNum ); - data.boneWeights = ( vec4_t * ) ri.Hunk_AllocateTempMemory( sizeof( *data.boneWeights ) * vertexesNum ); - data.st = ( f16vec2_t * ) ri.Hunk_AllocateTempMemory( sizeof( f16vec2_t ) * vertexesNum ); - data.numVerts = vertexesNum; - + i16vec4_t *qtangents = ( i16vec4_t * ) ri.Hunk_AllocateTempMemory( sizeof( i16vec4_t ) * vertexesNum ); + u16vec4_t *boneFactors = (u16vec4_t*)ri.Hunk_AllocateTempMemory( sizeof( u16vec4_t ) * vertexesNum ); indexes = ( glIndex_t * ) ri.Hunk_AllocateTempMemory( indexesNum * sizeof( glIndex_t ) ); vboSurf->numBoneRemap = 0; @@ -156,41 +155,45 @@ srfVBOMD5Mesh_t *R_GenerateMD5VBOSurface( for ( j = 0; j < vertexesNum; j++ ) { - VectorCopy( surf->verts[ j ].position, data.xyz[ j ] ); R_TBNtoQtangents( surf->verts[ j ].tangent, surf->verts[ j ].binormal, - surf->verts[ j ].normal, data.qtangent[ j ] ); - - Vector2Copy( surf->verts[ j ].texCoords, data.st[ j ] ); + surf->verts[ j ].normal, qtangents[ j ] ); for (unsigned k = 0; k < MAX_WEIGHTS; k++ ) { if ( k < surf->verts[ j ].numWeights ) { - data.boneIndexes[ j ][ k ] = vboSurf->boneRemap[ surf->verts[ j ].boneIndexes[ k ] ]; - data.boneWeights[ j ][ k ] = surf->verts[ j ].boneWeights[ k ]; + uint16_t boneIndex = vboSurf->boneRemap[ surf->verts[ j ].boneIndexes[ k ] ]; + boneFactors[ j ][ k ] = boneFactor( boneIndex, surf->verts[ j ].boneWeights[ k ] ); } else { - data.boneWeights[ j ][ k ] = 0; - data.boneIndexes[ j ][ k ] = 0; + boneFactors[ j ][ k ] = 0; } } } - vboSurf->vbo = R_CreateStaticVBO( ( "MD5 surface VBO " + surfName ).c_str(), data, vboLayout_t::VBO_LAYOUT_SKELETAL ); + // MD5 does not have color, but shaders always require the color vertex attribute, so we have + // to provide this 0 color. + // TODO: optimize a vertexAttributeSpec_t with 0 stride to use a non-array vertex attribute? + // (although that would mess up the nice 32-bit size) + const byte dummyColor[ 4 ]{}; - vboSurf->ibo = R_CreateStaticIBO( ( "MD5 surface IBO " + surfName ).c_str(), indexes, indexesNum ); + vertexAttributeSpec_t attributes[] { + { ATTR_INDEX_BONE_FACTORS, GL_UNSIGNED_SHORT, GL_UNSIGNED_SHORT, boneFactors, 4, sizeof(u16vec4_t), 0 }, + { ATTR_INDEX_POSITION, GL_FLOAT, GL_SHORT, &surf->verts[ 0 ].position, 3, sizeof(md5Vertex_t), ATTR_OPTION_NORMALIZE }, + { ATTR_INDEX_QTANGENT, GL_SHORT, GL_SHORT, qtangents, 4, sizeof(i16vec4_t), ATTR_OPTION_NORMALIZE }, + { ATTR_INDEX_TEXCOORD, GL_HALF_FLOAT, GL_HALF_FLOAT, &surf->verts[ 0 ].texCoords, 2, sizeof(md5Vertex_t), 0 }, + { ATTR_INDEX_COLOR, GL_UNSIGNED_BYTE, GL_UNSIGNED_BYTE, dummyColor, 4, 0, ATTR_OPTION_NORMALIZE }, + }; + + vboSurf->vbo = R_CreateStaticVBO( "MD5 surface VBO " + surfName, + std::begin( attributes ), std::end( attributes ), vertexesNum ); - // MD5 does not have color, but shaders always request it and the skeletal animation - // vertex layout includes a color field, which is zeroed by default. - vboSurf->vbo->attribBits |= ATTR_COLOR; + vboSurf->ibo = R_CreateStaticIBO( ( "MD5 surface IBO " + surfName ).c_str(), indexes, indexesNum ); ri.Hunk_FreeTempMemory( indexes ); - ri.Hunk_FreeTempMemory( data.st ); - ri.Hunk_FreeTempMemory( data.boneWeights ); - ri.Hunk_FreeTempMemory( data.boneIndexes ); - ri.Hunk_FreeTempMemory( data.qtangent ); - ri.Hunk_FreeTempMemory( data.xyz ); + ri.Hunk_FreeTempMemory( boneFactors ); + ri.Hunk_FreeTempMemory( qtangents ); return vboSurf; } diff --git a/src/engine/renderer/tr_public.h b/src/engine/renderer/tr_public.h index 5178253f30..e72c4d3a15 100644 --- a/src/engine/renderer/tr_public.h +++ b/src/engine/renderer/tr_public.h @@ -128,6 +128,7 @@ struct glconfig2_t bool mapBufferRangeAvailable; bool syncAvailable; bool depthClampAvailable; + bool halfFloatVertexAvailable; bool realtimeLighting; bool shadowMapping; diff --git a/src/engine/renderer/tr_vbo.cpp b/src/engine/renderer/tr_vbo.cpp index 29e8aa9c2d..a6e358f85a 100644 --- a/src/engine/renderer/tr_vbo.cpp +++ b/src/engine/renderer/tr_vbo.cpp @@ -38,16 +38,6 @@ struct fmtVertexAnim2 { }; const GLsizei sizeVertexAnim2 = sizeof( struct fmtVertexAnim2 ); -// interleaved data: position, texcoord, colour, qtangent, bonefactors -struct fmtSkeletal { - i16vec4_t position; - f16vec2_t texcoord; - Color::Color32Bit colour; - i16vec4_t qtangents; - u16vec4_t boneFactors; -}; -const GLsizei sizeSkeletal = sizeof( struct fmtSkeletal ); - // interleaved data: position, colour, qtangent, texcoord // -> struct shaderVertex_t in tr_local.h const GLsizei sizeShaderVertex = sizeof( shaderVertex_t ); @@ -76,11 +66,6 @@ static uint32_t R_DeriveAttrBits( const vboData_t &data ) stateBits |= ATTR_TEXCOORD; } - if ( data.boneIndexes && data.boneWeights ) - { - stateBits |= ATTR_BONE_FACTORS; - } - if ( data.numFrames ) { if ( data.xyz ) @@ -139,47 +124,6 @@ static void R_SetAttributeLayoutsVertexAnimation( VBO_t *vbo ) vbo->vertexesSize = sizePart1 + sizePart2; } -static void R_SetAttributeLayoutsSkeletal( VBO_t *vbo ) -{ - vbo->attribs[ ATTR_INDEX_POSITION ].numComponents = 4; - vbo->attribs[ ATTR_INDEX_POSITION ].componentType = GL_SHORT; - vbo->attribs[ ATTR_INDEX_POSITION ].normalize = GL_TRUE; - vbo->attribs[ ATTR_INDEX_POSITION ].ofs = offsetof( struct fmtSkeletal, position ); - vbo->attribs[ ATTR_INDEX_POSITION ].stride = sizeSkeletal; - vbo->attribs[ ATTR_INDEX_POSITION ].frameOffset = 0; - - vbo->attribs[ ATTR_INDEX_TEXCOORD ].numComponents = 2; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].componentType = GL_HALF_FLOAT; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].normalize = GL_FALSE; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].ofs = offsetof( struct fmtSkeletal, texcoord ); - vbo->attribs[ ATTR_INDEX_TEXCOORD ].stride = sizeSkeletal; - vbo->attribs[ ATTR_INDEX_TEXCOORD ].frameOffset = 0; - - vbo->attribs[ ATTR_INDEX_COLOR ].numComponents = 4; - vbo->attribs[ ATTR_INDEX_COLOR ].componentType = GL_UNSIGNED_BYTE; - vbo->attribs[ ATTR_INDEX_COLOR ].normalize = GL_TRUE; - vbo->attribs[ ATTR_INDEX_COLOR ].ofs = offsetof( struct fmtSkeletal, colour ); - vbo->attribs[ ATTR_INDEX_COLOR ].stride = sizeSkeletal; - vbo->attribs[ ATTR_INDEX_COLOR ].frameOffset = 0; - - vbo->attribs[ ATTR_INDEX_QTANGENT ].numComponents = 4; - vbo->attribs[ ATTR_INDEX_QTANGENT ].componentType = GL_SHORT; - vbo->attribs[ ATTR_INDEX_QTANGENT ].normalize = GL_TRUE; - vbo->attribs[ ATTR_INDEX_QTANGENT ].ofs = offsetof( struct fmtSkeletal, qtangents ); - vbo->attribs[ ATTR_INDEX_QTANGENT ].stride = sizeSkeletal; - vbo->attribs[ ATTR_INDEX_QTANGENT ].frameOffset = 0; - - vbo->attribs[ ATTR_INDEX_BONE_FACTORS ].numComponents = 4; - vbo->attribs[ ATTR_INDEX_BONE_FACTORS ].componentType = GL_UNSIGNED_SHORT; - vbo->attribs[ ATTR_INDEX_BONE_FACTORS ].normalize = GL_FALSE; - vbo->attribs[ ATTR_INDEX_BONE_FACTORS ].ofs = offsetof( struct fmtSkeletal, boneFactors ); - vbo->attribs[ ATTR_INDEX_BONE_FACTORS ].stride = sizeSkeletal; - vbo->attribs[ ATTR_INDEX_BONE_FACTORS ].frameOffset = 0; - - // total size - vbo->vertexesSize = sizeSkeletal * vbo->vertexesNum; -} - static void R_SetAttributeLayoutsStatic( VBO_t *vbo ) { vbo->attribs[ ATTR_INDEX_POSITION ].numComponents = 3; @@ -240,10 +184,6 @@ static void R_SetVBOAttributeLayouts( VBO_t *vbo ) { R_SetAttributeLayoutsVertexAnimation( vbo ); } - else if ( vbo->layout == vboLayout_t::VBO_LAYOUT_SKELETAL ) - { - R_SetAttributeLayoutsSkeletal( vbo ); - } else if ( vbo->layout == vboLayout_t::VBO_LAYOUT_STATIC ) { R_SetAttributeLayoutsStatic( vbo ); @@ -259,11 +199,23 @@ static void R_SetVBOAttributeLayouts( VBO_t *vbo ) } } -// index has to be in range 0-255, weight has to be >= 0 and <= 1 -static unsigned short -boneFactor( int index, float weight ) { - int scaledWeight = lrintf( weight * 255.0F ); - return (unsigned short)( ( scaledWeight << 8 ) | index ); +static uint32_t ComponentSize( GLenum type ) +{ + switch ( type ) + { + case GL_UNSIGNED_BYTE: + return 1; + + case GL_SHORT: + case GL_UNSIGNED_SHORT: + case GL_HALF_FLOAT: + return 2; + + case GL_FLOAT: + return 4; + } + + Sys::Error( "VBO ComponentSize: unknown type %d", type ); } static void R_CopyVertexData( VBO_t *vbo, byte *outData, vboData_t inData ) @@ -295,42 +247,7 @@ static void R_CopyVertexData( VBO_t *vbo, byte *outData, vboData_t inData ) for ( v = 0; v < vbo->vertexesNum; v++ ) { - if ( vbo->layout == vboLayout_t::VBO_LAYOUT_SKELETAL ) { - struct fmtSkeletal *ptr = ( struct fmtSkeletal * )outData; - if ( ( vbo->attribBits & ATTR_POSITION ) ) - { - vec4_t tmp; - VectorCopy( inData.xyz[ v ], tmp); - tmp[ 3 ] = 1.0f; // unused - - floatToSnorm16( tmp, ptr[ v ].position ); - } - - if ( ( vbo->attribBits & ATTR_TEXCOORD ) ) - { - Vector2Copy( inData.st[ v ], ptr[ v ].texcoord ); - } - - if ( ( vbo->attribBits & ATTR_COLOR ) ) - { - ptr[ v ].colour = Color::Adapt( inData.color[ v ] ); - } - - if ( ( vbo->attribBits & ATTR_QTANGENT ) ) - { - Vector4Copy( inData.qtangent[ v ], ptr[ v ].qtangents ); - } - - if ( ( vbo->attribBits & ATTR_BONE_FACTORS ) ) - { - uint32_t j; - - for ( j = 0; j < 4; j++ ) { - ptr[ v ].boneFactors[ j ] = boneFactor( inData.boneIndexes[ v ][ j ], - inData.boneWeights[ v ][ j ] ); - } - } - } else if ( vbo->layout == vboLayout_t::VBO_LAYOUT_XYST ) { + if ( vbo->layout == vboLayout_t::VBO_LAYOUT_XYST ) { vec2_t *ptr = ( vec2_t * )outData; if ( ( vbo->attribBits & ATTR_POSITION ) ) { @@ -475,6 +392,182 @@ VBO_t *R_CreateDynamicVBO( const char *name, int numVertexes, uint32_t stateBits return vbo; } +static void CopyVertexAttribute( + const vboAttributeLayout_t &attrib, const vertexAttributeSpec_t &spec, + uint32_t count, byte *interleavedData ) +{ + if ( count == 0 ) + { + return; // some loops below are 'do/while'-like + } + + const size_t inStride = spec.stride; + const size_t outStride = attrib.stride; + + byte *out = interleavedData + attrib.ofs; + const byte *in = reinterpret_cast( spec.begin ); + + if ( attrib.componentType == spec.componentInputType ) + { + uint32_t size = attrib.numComponents * ComponentSize( attrib.componentType ); + + for ( uint32_t v = count; ; ) + { + memcpy( out, in, size ); + + if ( --v == 0 ) break; + in += inStride; + out += outStride; + } + } + else if ( spec.componentInputType == GL_FLOAT && attrib.componentType == GL_HALF_FLOAT ) + { + for ( uint32_t v = count; ; ) + { + const float *single = reinterpret_cast( in ); + f16_t *half = reinterpret_cast( out ); + for ( uint32_t n = spec.numComponents; n--; ) + { + *half++ = floatToHalf( *single++ ); + } + + if ( --v == 0 ) break; + in += inStride; + out += outStride; + } + } + else if ( spec.componentInputType == GL_HALF_FLOAT && attrib.componentType == GL_FLOAT ) + { + for ( uint32_t v = count; ; ) + { + const f16_t *half = reinterpret_cast( in ); + float *single = reinterpret_cast( out ); + for ( uint32_t n = spec.numComponents; n--; ) + { + *single++ = halfToFloat( *half++ ); + } + + if ( --v == 0 ) break; + in += inStride; + out += outStride; + } + } + else if ( spec.componentInputType == GL_FLOAT && attrib.componentType == GL_SHORT + && spec.attrOptions & ATTR_OPTION_NORMALIZE ) + { + for ( uint32_t v = count; ; ) + { + const float *single = reinterpret_cast( in ); + int16_t *snorm = reinterpret_cast( out ); + for ( uint32_t n = spec.numComponents; n--; ) + { + *snorm++ = floatToSnorm16( *single++ ); + } + + if ( --v == 0 ) break; + in += inStride; + out += outStride; + } + } + else if ( spec.componentInputType == GL_FLOAT && attrib.componentType == GL_UNSIGNED_SHORT + && spec.attrOptions & ATTR_OPTION_NORMALIZE ) + { + for ( uint32_t v = count; ; ) + { + const float *single = reinterpret_cast( in ); + uint16_t *unorm = reinterpret_cast( out ); + for ( uint32_t n = spec.numComponents; n--; ) + { + *unorm++ = floatToUnorm16( *single++ ); + } + + if ( --v == 0 ) break; + in += inStride; + out += outStride; + } + } + else + { + Sys::Error( "Unsupported GL type conversion (%d to %d)", + spec.componentInputType, attrib.componentType ); + } +} + +VBO_t *R_CreateStaticVBO( + Str::StringRef name, + const vertexAttributeSpec_t *attrBegin, const vertexAttributeSpec_t *attrEnd, + uint32_t numVerts ) +{ + // make sure the render thread is stopped + R_SyncRenderThread(); + + VBO_t *vbo = (VBO_t*) ri.Hunk_Alloc( sizeof( *vbo ), ha_pref::h_low ); + *vbo = {}; + tr.vbos.push_back( vbo ); + + Q_strncpyz( vbo->name, name.c_str(), sizeof(vbo->name)); + vbo->vertexesNum = numVerts; + vbo->usage = GL_STATIC_DRAW; + + glGenBuffers( 1, &vbo->vertexesVBO ); + R_BindVBO( vbo ); + + uint32_t ofs = 0; + + for ( const vertexAttributeSpec_t *spec = attrBegin; spec != attrEnd; ++spec ) + { + vboAttributeLayout_t &attrib = vbo->attribs[ spec->attrIndex ]; + ASSERT_EQ( attrib.numComponents, 0 ); + ASSERT_NQ( spec->numComponents, 0U ); + attrib.componentType = spec->componentStorageType; + if ( attrib.componentType == GL_HALF_FLOAT && !glConfig2.halfFloatVertexAvailable ) + { + attrib.componentType = GL_FLOAT; + } + attrib.numComponents = spec->numComponents; + attrib.ofs = ofs; + attrib.normalize = spec->attrOptions & ATTR_OPTION_NORMALIZE ? GL_TRUE : GL_FALSE; + + ofs += attrib.numComponents * ComponentSize( attrib.componentType ); + ofs = ( ofs + 3 ) & ~3; + } + + for ( int i = 0; i < ATTR_INDEX_MAX; i++ ) + { + if ( vbo->attribs[ i ].numComponents ) + { + vbo->attribs[ i ].stride = ofs; + vbo->attribBits |= 1 << i; + } + } + + vbo->vertexesSize = numVerts * ofs; + + // TODO: does it really need to be interleaved? + byte *interleavedData = (byte *)ri.Hunk_AllocateTempMemory( vbo->vertexesSize ); + + for ( const vertexAttributeSpec_t *spec = attrBegin; spec != attrEnd; ++spec ) + { + CopyVertexAttribute( vbo->attribs[ spec->attrIndex ], *spec, numVerts, interleavedData ); + } + +#ifdef GL_ARB_buffer_storage + if( glConfig2.bufferStorageAvailable ) { + glBufferStorage( GL_ARRAY_BUFFER, vbo->vertexesSize, interleavedData, 0 ); + } else +#endif + { + glBufferData( GL_ARRAY_BUFFER, vbo->vertexesSize, interleavedData, vbo->usage ); + } + + ri.Hunk_FreeTempMemory( interleavedData ); + + R_BindNullVBO(); + GL_CheckErrors(); + + return vbo; +} + /* ============ R_CreateVBO diff --git a/src/engine/sys/sdl_glimp.cpp b/src/engine/sys/sdl_glimp.cpp index 868c3eff15..323c12435e 100644 --- a/src/engine/sys/sdl_glimp.cpp +++ b/src/engine/sys/sdl_glimp.cpp @@ -80,6 +80,8 @@ static Cvar::Cvar r_arb_gpu_shader5( "r_arb_gpu_shader5", "Use GL_ARB_gpu_shader5 if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_half_float_pixel( "r_arb_half_float_pixel", "Use GL_ARB_half_float_pixel if available", Cvar::NONE, true ); +static Cvar::Cvar r_arb_half_float_vertex( "r_arb_half_float_vertex", + "Use GL_ARB_half_float_vertex if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_indirect_parameters( "r_arb_indirect_parameters", "Use GL_ARB_indirect_parameters if available", Cvar::NONE, true ); static Cvar::Cvar r_arb_internalformat_query2( "r_arb_internalformat_query2", @@ -1972,6 +1974,7 @@ static void GLimp_InitExtensions() Cvar::Latch( r_arb_framebuffer_object ); Cvar::Latch( r_arb_gpu_shader5 ); Cvar::Latch( r_arb_half_float_pixel ); + Cvar::Latch( r_arb_half_float_vertex ); Cvar::Latch( r_arb_indirect_parameters ); Cvar::Latch( r_arb_internalformat_query2 ); Cvar::Latch( r_arb_map_buffer_range ); @@ -2210,6 +2213,9 @@ static void GLimp_InitExtensions() // VAO and VBO // made required in OpenGL 3.0 LOAD_EXTENSION( ExtFlag_REQUIRED | ExtFlag_CORE, ARB_half_float_vertex ); + // WIP: engine is not yet usable without this + glConfig2.halfFloatVertexAvailable = LOAD_EXTENSION_WITH_TEST( + ExtFlag_CORE, ARB_half_float_vertex, r_arb_half_float_vertex.Get() ); if ( !workaround_glExtension_missingArbFbo_useExtFbo.Get() ) {