From 73707eb1ad0b1573b169190e39844d010453d251 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 26 Jun 2024 12:23:35 +0100 Subject: [PATCH] GS/HW: Further fixes to RT in RT - Still a ways to go... --- pcsx2/GS/GSState.h | 1 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 91 +++++++++++----- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 128 ++++++++++++++++------- 3 files changed, 151 insertions(+), 69 deletions(-) diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 1ab92defe57563..56d80a51ef8163 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -225,6 +225,7 @@ class GSState : public GSAlignedClass<32> bool m_isPackedUV_HackFlag = false; bool m_channel_shuffle = false; bool m_in_target_draw = false; + u32 m_target_offset = 0; u8 m_scanmask_used = 0; u32 m_dirty_gs_regs = 0; int m_backed_up_ctx = 0; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 75e6b559fd4f34..3251c00ba20dbe 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -2074,6 +2074,8 @@ void GSRendererHW::Draw() if (num_skipped_channel_shuffle_draws > 0) GL_INS("Skipped %u channel shuffle draws", num_skipped_channel_shuffle_draws); num_skipped_channel_shuffle_draws = 0; + m_last_channel_shuffle_fbp = 0xffff; + m_last_channel_shuffle_end_block = 0xffff; #else if (m_channel_shuffle) return; @@ -2723,18 +2725,21 @@ void GSRendererHW::Draw() const GSLocalMemory::psm_t& frame_psm = GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM]; m_in_target_draw = false; + m_target_offset = 0; if (!no_rt) { + const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && + GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || + IsPossibleChannelShuffle()); + // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; - FRAME_TEX0.TBP0 = m_cached_ctx.FRAME.Block(); - FRAME_TEX0.TBW = (m_channel_shuffle && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + FRAME_TEX0.TBP0 = ((m_last_channel_shuffle_end_block + 1) == m_cached_ctx.FRAME.Block() && possible_shuffle) ? m_last_channel_shuffle_fbp : m_cached_ctx.FRAME.Block(); + FRAME_TEX0.TBW = (possible_shuffle && (m_last_channel_shuffle_end_block + 1) && src->m_target) ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; FRAME_TEX0.PSM = m_cached_ctx.FRAME.PSM; - const bool possible_shuffle = draw_sprite_tex && (((src && src->m_target && src->m_from_target && src->m_from_target->m_32_bits_fmt) && - GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) || - IsPossibleChannelShuffle()); + // Don't clamp on shuffle, the height cache may troll us with the REAL height. if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); @@ -2749,12 +2754,10 @@ void GSRendererHW::Draw() // Of course if this size is different (in width) or this is a shuffle happening, this will be bypassed. const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); - m_in_target_draw = false; - rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src); - + // Draw skipped because it was a clear and there was no target. if (!rt) { @@ -2788,12 +2791,14 @@ void GSRendererHW::Draw() return; } } - else if (rt->m_TEX0.TBP0 != FRAME_TEX0.TBP0) // Must have done rt in rt + else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) // Must have done rt in rt { GSVertex* v = &m_vertex.buff[0]; - u32 vertical_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. + int vertical_offset = ((std::abs(static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) / std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.y; // I know I could just not shift it.. - const u32 horizontal_offset = (((FRAME_TEX0.TBP0 - rt->m_TEX0.TBP0) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + const int horizontal_offset = (std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5) % std::max(rt->m_TEX0.TBW, 1U)) * frame_psm.pgs.x; + // Used to reduce the offset made later in channel shuffles + m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); for (u32 i = 0; i < m_vertex.tail; i++) { @@ -2816,6 +2821,16 @@ void GSRendererHW::Draw() m_vt.m_max.p.y += vertical_offset; t_size.x = rt->m_unscaled_size.x - horizontal_offset; t_size.y = rt->m_unscaled_size.y - vertical_offset; + + if (t_size.y <= 0) + { + u32 new_height = m_r.w; + + //DevCon.Warning("Resizing texture %d x %d draw %d", rt->m_unscaled_size.x, new_height, s_n); + rt->ResizeTexture(rt->m_unscaled_size.x, new_height); + rt->UpdateValidity(m_r, true); + rt->UpdateDrawn(m_r, true); + } } if (src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) @@ -2838,6 +2853,8 @@ void GSRendererHW::Draw() // If it's a new target, we don't know where the end is as it's starting on a shuffle, so just do every shuffle following. m_last_channel_shuffle_end_block = (rt->m_last_draw >= s_n) ? (MAX_BLOCKS - 1) : (rt->m_end_block < rt->m_TEX0.TBP0 ? (rt->m_end_block + MAX_BLOCKS) : rt->m_end_block); } + else + m_last_channel_shuffle_end_block = 0xFFFF; } GSTextureCache::Target* ds = nullptr; @@ -2846,12 +2863,14 @@ void GSRendererHW::Draw() { ZBUF_TEX0.U64 = 0; ZBUF_TEX0.TBP0 = m_cached_ctx.ZBUF.Block(); - ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + ZBUF_TEX0.TBW = m_cached_ctx.FRAME.FBW; ZBUF_TEX0.PSM = m_cached_ctx.ZBUF.PSM; ds = g_texture_cache->LookupTarget(ZBUF_TEX0, t_size, target_scale, GSTextureCache::DepthStencil, m_cached_ctx.DepthWrite(), 0, false, force_preload, preserve_depth, preserve_depth, unclamped_draw_rect, IsPossibleChannelShuffle(), is_possible_mem_clear && ZBUF_TEX0.TBP0 != m_cached_ctx.FRAME.Block()); + ZBUF_TEX0.TBW = m_channel_shuffle ? src->m_from_target_TEX0.TBW : m_cached_ctx.FRAME.FBW; + if (!ds) { ds = g_texture_cache->CreateTarget(ZBUF_TEX0, t_size, GetValidSize(src), target_scale, GSTextureCache::DepthStencil, @@ -3142,7 +3161,10 @@ void GSRendererHW::Draw() rt->m_valid_alpha_high = false; } if (FRAME_TEX0.TBW != 1 || (m_r.width() > frame_psm.pgs.x || m_r.height() > frame_psm.pgs.y)) + { rt->m_TEX0 = FRAME_TEX0; + + } } if (ds && (!is_possible_mem_clear || ds->m_TEX0.PSM != ZBUF_TEX0.PSM || (rt && ds->m_TEX0.TBW != rt->m_TEX0.TBW)) && !m_in_target_draw) @@ -3296,7 +3318,7 @@ void GSRendererHW::Draw() } } } - else if (!m_in_target_draw) + else { // RT and DS sizes need to match, even if we're not doing any resizing. const int new_w = std::max(rt ? rt->m_unscaled_size.x : 0, ds ? ds->m_unscaled_size.x : 0); @@ -3519,6 +3541,8 @@ void GSRendererHW::Draw() if (rt) rt->m_last_draw = s_n; + if (ds) + ds->m_last_draw = s_n; #ifdef DISABLE_HW_TEXTURE_CACHE if (rt) g_texture_cache->Read(rt, real_rect); @@ -3973,7 +3997,7 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool m_conf.ps.urban_chaos_hle = 1; } } - else if (m_index.tail <= 64 && m_cached_ctx.CLAMP.WMT == 3) + else if (m_index.tail < 64 && m_cached_ctx.CLAMP.WMT == 3) { // Blood will tell. I think it is channel effect too but again // implemented in a different way. I don't want to add more CRC stuff. So @@ -4147,16 +4171,24 @@ __ri bool GSRendererHW::EmulateChannelShuffle(GSTextureCache::Target* src, bool } else { + const GSLocalMemory::psm_t frame_psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; + const u32 frame_page_offset = std::max(static_cast(((m_r.x / frame_psm.pgs.x) + (m_r.y / frame_psm.pgs.y) * src->m_TEX0.TBW) - m_target_offset), 0); + m_r = GSVector4i(m_r.x & ~(frame_psm.pgs.x - 1), m_r.y & ~(frame_psm.pgs.y - 1), (m_r.z + (frame_psm.pgs.x - 1)) & ~(frame_psm.pgs.x - 1), (m_r.w + (frame_psm.pgs.y - 1)) & ~(frame_psm.pgs.y - 1)); + m_cached_ctx.FRAME.FBP += frame_page_offset; + m_in_target_draw |= frame_page_offset > 0; GSVertex* s = &m_vertex.buff[0]; s[0].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.x << 4)); s[1].XYZ.X = static_cast(m_context->XYOFFSET.OFX + (m_r.z << 4)); s[0].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.y << 4)); s[1].XYZ.Y = static_cast(m_context->XYOFFSET.OFY + (m_r.w << 4)); - s[0].U = (m_r.x << 4); - s[1].U = (m_r.z << 4); - s[0].V = (m_r.y << 4); - s[1].V = (m_r.w << 4); + const GSLocalMemory::psm_t tex_psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; + const u32 tex_page_offset = (m_vt.m_min.t.x / tex_psm.pgs.x) + (m_vt.m_min.t.y / tex_psm.pgs.y); + m_cached_ctx.TEX0.TBP0 += tex_page_offset << 5; + s[0].U = m_r.x << 4; + s[1].U = m_r.z << 4; + s[0].V = m_r.y << 4; + s[1].V = m_r.w << 4; m_last_channel_shuffle_fbmsk = 0xFFFFFFFF; } @@ -5239,7 +5271,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c const int frame_diff = rt ? static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) : 0; // Detect framebuffer read that will need special handling const GSTextureCache::Target* src_target = nullptr; - if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && tex_diff != frame_diff)) + if (m_conf.tex == m_conf.rt && !(m_channel_shuffle && tex && (tex_diff != frame_diff || target_region))) { // Can we read the framebuffer directly? (i.e. sample location matches up). if (CanUseTexIsFB(rt, tex, tmm)) @@ -5297,7 +5329,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c GSVector4i copy_range; GSVector2i copy_size; GSVector2i copy_dst_offset; - + bool copied_rt = false; // Shuffles take the whole target. This should've already been halved. // We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above. // Restricting it also breaks Tom and Jerry... @@ -5313,11 +5345,14 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c u32 page_offset = (m_cached_ctx.TEX0.TBP0 - src_target->m_TEX0.TBP0) >> 5; u32 vertical_offset = (page_offset / src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.y; u32 horizontal_offset = (page_offset % src_target->m_TEX0.TBW) * GSLocalMemory::m_psm[src_target->m_TEX0.PSM].pgs.x; + copy_range.y += vertical_offset; copy_range.x += horizontal_offset; copy_size.y -= vertical_offset; copy_size.x -= horizontal_offset; - + target_region = false; + source_region.bits = 0; + //copied_rt = tex->m_from_target != nullptr; if (m_in_target_draw) { copy_size.x = m_r.width(); @@ -5413,12 +5448,9 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c static_cast(std::ceil(static_cast(copy_dst_offset.y) * scale))); src_copy.reset(src_target->m_texture->IsDepthStencil() ? - g_gs_device->CreateDepthStencil( - scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : - (m_downscale_source ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, - true) : - g_gs_device->CreateTexture( - scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true))); + g_gs_device->CreateDepthStencil(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) : + (m_downscale_source || copied_rt) ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), true, true) : + g_gs_device->CreateTexture(scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true)); if (!src_copy) [[unlikely]] { Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y); @@ -5426,6 +5458,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c m_conf.ps.tfx = 4; return; } + if (m_downscale_source) { g_perfmon.Put(GSPerfMon::TextureCopies, 1); @@ -7266,7 +7299,7 @@ void GSRendererHW::ClearGSLocalMemory(const GSOffset& off, const GSVector4i& r, bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Source* tex, const GSVector4i& r_draw) { - if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) + /*if (r_draw.w > 1024 && (m_vt.m_primclass == GS_SPRITE_CLASS) && (m_vertex.next == 2) && m_process_texture && !PRIM->ABE && tex && !tex->m_target && m_cached_ctx.TEX0.TBW > 0) { GL_PUSH("OI_BlitFMV"); @@ -7320,7 +7353,7 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc g_texture_cache->InvalidateVideoMemSubTarget(_rt); return false; // skip current draw - } + }*/ // Nothing to see keep going return true; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 504e4e2df22349..b346b8209afdf1 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -18,6 +18,7 @@ #include "fmt/format.h" #include +#include #ifdef __APPLE__ #include @@ -1094,8 +1095,8 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const block_boundary_rect.x = block_boundary_rect.x & ~(psm_s.bs.x - 1); block_boundary_rect.y = block_boundary_rect.y & ~(psm_s.bs.y - 1); // Round up to the nearst block boundary for lookup to avoid problems due to bilinear and inclusive rects. - block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x - 2)) & ~(psm_s.bs.x - 1)); - block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y - 2)) & ~(psm_s.bs.y - 1)); + block_boundary_rect.z = std::max(req_rect.x + 1, (block_boundary_rect.z + (psm_s.bs.x / 2)) & ~(psm_s.bs.x - 1)); + block_boundary_rect.w = std::max(req_rect.y + 1, (block_boundary_rect.w + (psm_s.bs.y / 2)) & ~(psm_s.bs.y - 1)); // Arc the Lad finds the wrong surface here when looking for a depth stencil. // Since we're currently not caching depth stencils (check ToDo in CreateSource) we should not look for it here. @@ -1117,7 +1118,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (((bp & (BLOCKS_PER_PAGE - 1)) != (t->m_TEX0.TBP0 & (BLOCKS_PER_PAGE - 1))) && (bp & (BLOCKS_PER_PAGE - 1))) continue; - const bool overlaps = t->Overlaps(bp, bw, psm, block_boundary_rect); + const bool overlaps = t->Inside(bp, bw, psm, block_boundary_rect); // Try to make sure the target has available what we need, be careful of self referencing frames with font in the alpha. // Also is we have already found a target which we had to offset in to by using a region or exact address, @@ -1490,7 +1491,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Fixes Jak eyes rendering. // Fixes Xenosaga 3 last dungeon graphic bug. // Fixes Pause menu in The Getaway. - const bool can_translate = CanTranslate(bp, bw, src_psm, new_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW); + const bool can_translate = CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW); if (can_translate) { const bool swizzle_match = GSLocalMemory::m_psm[src_psm].depth == GSLocalMemory::m_psm[t->m_TEX0.PSM].depth; @@ -1500,7 +1501,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (swizzle_match) { - rect = TranslateAlignedRectByPage(t, bp, src_psm, bw, new_rect); + rect = TranslateAlignedRectByPage(t, bp, psm, bw, new_rect); rect.x -= new_rect.x; rect.y -= new_rect.y; } @@ -1525,7 +1526,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const rect.y -= new_rect.y & ~(page_size.y - 1); } - rect = rect.rintersect(t->m_valid); + //rect = rect.rintersect(t->m_valid); if (rect.rempty()) continue; @@ -1646,8 +1647,6 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (!found_t && !dst && !GSConfig.UserHacks_DisableDepthSupport) { - GSVector4i new_rect = req_rect; - // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase // @@ -1833,8 +1832,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe bool can_use = true; if (dst && (GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw)) + { + DevCon.Warning("Ignoring target at %x as one at %x is newer", t->m_TEX0.TBP0, dst->m_TEX0.TBP0); continue; - + } + // if It's an old target and it's being completely overwritten, kill it. // Dragon Quest 8 reuses a render-target sized buffer as a single-page buffer, without clearing it. But, // it does dirty it by writing over the 64x64 region. So while we can't use this heuristic for tossing @@ -1879,12 +1881,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe dst->m_32_bits_fmt |= (psm_s.bpp != 16); - if (FindOverlappingTarget(dst)) + /*if (FindOverlappingTarget(dst)) continue; - else + else*/ break; } - else + else if(!(src && src->m_from_target == t)) { GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); InvalidateSourcesFromTarget(t); @@ -1893,14 +1895,34 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } // Probably pointing to half way through the target - else if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) + else if(GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { - //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); - dst = t; + const u32 widthpage_offset = (std::abs(static_cast(bp - t->m_TEX0.TBP0)) >> 5) % std::max(t->m_TEX0.TBW, 1U); + const bool is_aligned_ok = widthpage_offset == 0 || ((widthpage_offset + TEX0.TBW) <= t->m_TEX0.TBW) || min_rect.width() <= 64 || (widthpage_offset == (t->m_TEX0.TBW >> 1) && (static_cast(min_rect.width()) <= (widthpage_offset * 64))); + if ((!dst || ((GSState::s_n - dst->m_last_draw) < (GSState::s_n - t->m_last_draw))) && is_aligned_ok && (t->m_TEX0.TBW == TEX0.TBW || (TEX0.TBW == 1 && t->m_TEX0.TBW > 1)) && t->Inside(bp, TEX0.TBW, TEX0.PSM, min_rect)) + { + // If it's too old, it's probably not a real target to jump in to anymore. + if ((GSState::s_n - t->m_last_draw) > 10 && (!t->m_dirty.empty() || (!is_shuffle && + !(widthpage_offset == 0/*TEX0.TBP0 == ((((t->UnwrappedEndBlock() + 1) - t->m_TEX0.TBP0) >> 1) + t->m_TEX0.TBP0)*/ || min_rect.width() <= 64 || + (widthpage_offset == (t->m_TEX0.TBW >> 1) && min_rect.width() == widthpage_offset * 64))))) + { + GL_INS("TC: Deleting RT BP 0x%x BW %d PSM %s due to change in target", t->m_TEX0.TBP0, t->m_TEX0.TBW, psm_str(t->m_TEX0.PSM)); + InvalidateSourcesFromTarget(t); + i = list.erase(i); + delete t; + } + else + { + //DevCon.Warning("Here draw %d wanted %x PSM %x got %x PSM %x offset of %d pages width %d pages draw width %d", GSState::s_n, bp, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM, (bp - t->m_TEX0.TBP0) >> 5, t->m_TEX0.TBW, draw_rect.width()); + dst = t; - dst->m_32_bits_fmt |= (psm_s.bpp != 16); - //Continue just in case there's a newer target - continue; + dst->m_32_bits_fmt |= (psm_s.bpp != 16); + //Continue just in case there's a newer target + if (used) + list.MoveFront(i.Index()); + break; + } + } } } } @@ -2046,6 +2068,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe { calcRescale(dst); GSTexture* tex = g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, false); + if (!tex) + return nullptr; g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, ShaderConvert::FLOAT32_TO_FLOAT24, false); g_perfmon.Put(GSPerfMon::TextureCopies, 1); g_gs_device->Recycle(dst->m_texture); @@ -2086,10 +2110,11 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe GL_INS("TC Convert to 16bit: %dx%d: %dx%d @ %f -> %dx%d @ %f", dst->m_unscaled_size.x, dst->m_unscaled_size.y, dst->m_texture->GetWidth(), dst->m_texture->GetHeight(), dst->m_scale, new_scaled_size.x, new_scaled_size.y, scale); - DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); + //DevCon.Warning("Scale %s draw %d", scale_down ? "down" : "up", GSState::s_n); GSTexture* tex = type == RenderTarget ? g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true) : g_gs_device->CreateDepthStencil(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::DepthStencil, true); - + if (!tex) + return nullptr; m_target_memory_usage += tex->GetMemUsage(); g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false); @@ -2674,8 +2699,8 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons { auto j = i; Target* t = *j; - - if (dst != t && t->m_TEX0.TBW == dst->m_TEX0.TBW && t->m_TEX0.PSM == dst->m_TEX0.PSM && t->m_TEX0.TBW > 4) + + if (dst != t && t->m_TEX0.TBW == dst->m_TEX0.TBW && t->m_TEX0.PSM == dst->m_TEX0.PSM /*&& t->m_TEX0.TBW >= 4*/) if (t->Overlaps(dst->m_TEX0.TBP0, dst->m_TEX0.TBW, dst->m_TEX0.PSM, dst->m_valid)) { // could be overwriting a double buffer, so if it's the second half of it, just reduce the size down to half. @@ -2703,14 +2728,21 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons const int dst_offset = ((((t->m_TEX0.TBP0 - dst->m_TEX0.TBP0) >> 5) / dst->m_TEX0.TBW) * GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y); + if ((dst_offset + t->m_valid.w) > dst->m_unscaled_size.y && !dst->ResizeTexture(dst->m_unscaled_size.x, (dst_offset + t->m_valid.w), true)) + { + // Resize failed, probably ran out of VRAM, better luck next time. Fall back to CPU. + // We injected the new height into the cache, so hopefully won't happen again. + continue; + } + const int dst_offset_scaled = dst_offset * dst->m_scale; - const GSVector4i dst_rect = GSVector4i(t->m_valid.x, dst_offset, t->m_valid.z, dst_offset + overlapping_pages_height); + const GSVector4i dst_rect = GSVector4i(t->m_valid.x, dst_offset, t->m_valid.z, dst_offset + t->m_valid.w); if (((!hw_clear && (preserve_target || preload)) || dst_rect.rintersect(draw_rect).rempty()) && dst->GetScale() == t->GetScale()) { const int copy_width = (t->m_texture->GetWidth()) > (dst->m_texture->GetWidth()) ? (dst->m_texture->GetWidth()) : t->m_texture->GetWidth(); - const int copy_height = overlapping_pages_height * t->m_scale; + const int copy_height = t->m_valid.w * t->m_scale; - GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled); + GL_INS("RT double buffer copy from FBP 0x%x, %dx%d => %d,%d", t->m_TEX0.TBP0, copy_width, copy_height, 0, dst_offset_scaled); pxAssert(copy_width <= dst->GetTexture()->GetWidth() && copy_height <= dst->GetTexture()->GetHeight() && copy_width <= t->GetTexture()->GetWidth() && copy_height <= t->GetTexture()->GetHeight()); @@ -2719,6 +2751,12 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons // Clear the dirty first dst->Update(); + + if ((dst_offset + copy_height) > dst->m_unscaled_size.y) + DevCon.Warning("Way too tall draw %d", GSState::s_n); + + if ((copy_height) > t->m_unscaled_size.y) + DevCon.Warning("Way too tall for src draw %d", GSState::s_n); // Invalidate has been moved to after DrawPrims(), because we might kill the current sources' backing. if (!t->m_valid_rgb || !(t->m_valid_alpha_high || t->m_valid_alpha_low) || t->m_scale != dst->m_scale) { @@ -2742,18 +2780,13 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons if ((overlapping_pages < rt_pages)) { // This should never happen as we're making a new target so the src should never be something it overlaps, but just incase.. - GSVector4i new_valid = t->m_valid; - new_valid.y = std::max(new_valid.y + overlapping_pages_height, 0); - t->m_TEX0.TBP0 += (overlapping_pages_height / GSLocalMemory::m_psm[t->m_TEX0.PSM].pgs.y) << 5; - t->ResizeValidity(new_valid); - } - else - { - InvalidateSourcesFromTarget(t); - i = list.erase(j); - delete t; + GSVector4i new_valid = dst->m_valid.runion(GSVector4i(t->m_valid.x, t->m_valid.y + dst_offset, t->m_valid.z, t->m_valid.w + dst_offset)); + dst->UpdateValidity(new_valid); } - return hw_clear.value_or(false); + + InvalidateSourcesFromTarget(t); + i = list.erase(j); + delete t; } } i++; @@ -2955,7 +2988,7 @@ void GSTextureCache::ScaleTargetForDisplay(Target* t, const GIFRegTEX0& dispfb, } // Inject the new size back into the cache. - GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, 0, static_cast(needed_height)); + GetTargetSize(t->m_TEX0.TBP0, t->m_TEX0.TBW, t->m_TEX0.PSM, new_width, static_cast(needed_height)); } float GSTextureCache::ConvertColorToDepth(u32 c, ShaderConvert convert) @@ -4408,7 +4441,10 @@ void GSTextureCache::ReplaceSourceTexture(Source* s, GSTexture* new_texture, flo if (s->m_from_hash_cache) s->m_from_hash_cache->refcount++; else if (!s->m_shared_texture) + { + DevCon.Warning("replace %d", m_source_memory_usage); m_source_memory_usage += s->m_texture->GetMemUsage(); + } } void GSTextureCache::IncAge() @@ -4544,7 +4580,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + m_target_memory_usage += dTex->GetMemUsage(); // copy the rt in const GSVector4i area(GSVector4i(x, y, x + w, y + h).rintersect(GSVector4i(sTex->GetSize()).zwxy())); @@ -4861,7 +4897,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con return nullptr; } - m_source_memory_usage += dTex->GetMemUsage(); + src->m_shared_texture = false; + src->m_target_direct = false; + m_target_memory_usage += dTex->GetMemUsage(); src->m_texture = dTex; if (use_texture) @@ -5316,7 +5354,7 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR Console.Error("Failed to allocate %dx%d merged dest texture", scaled_width, scaled_height); return nullptr; } - + DevCon.Warning("Merged %d", m_source_memory_usage); m_source_memory_usage += dtex->GetMemUsage(); // Sort rect list by the texture, we want to batch as many as possible together. @@ -6204,6 +6242,7 @@ GSTextureCache::Target::~Target() { // Targets should never be shared. pxAssert(!m_shared_texture); + if (m_texture) { g_texture_cache->m_target_memory_usage -= m_texture->GetMemUsage(); @@ -6504,7 +6543,11 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) m_valid = m_valid.rintersect(rect); m_drawn_since_read = m_drawn_since_read.rintersect(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + m_end_block += offset; } + // Else No valid size, so need to resize down. // GL_CACHE("ResizeValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); @@ -6517,12 +6560,16 @@ void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_res m_valid = rect; m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + m_end_block += offset; } else if (can_resize) { m_valid = m_valid.runion(rect); m_end_block = GSLocalMemory::GetEndBlockAddress(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM, m_valid); + const u32 offset = ((UnwrappedEndBlock() + 1) - m_TEX0.TBP0) % (std::max(m_TEX0.TBW, 1U) << 5); + m_end_block += offset; } // GL_CACHE("UpdateValidity (0x%x->0x%x) from R:%d,%d Valid: %d,%d", m_TEX0.TBP0, m_end_block, rect.z, rect.w, m_valid.z, m_valid.w); } @@ -6986,6 +7033,7 @@ void GSTextureCache::Palette::InitializeTexture() } m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0])); + g_texture_cache->m_source_memory_usage += m_tex_palette->GetMemUsage(); } }