Skip to content

Commit

Permalink
fix(parse): don't lookup non-parsable files in the cache (#6476)
Browse files Browse the repository at this point in the history
**Problem:**
Today we try to read every file from the cache, even if it's
non-parsable, and always get a cache miss (since we don't parse it and
thus not storing anything). We shouldn't try and access the cache for
non-parsable files at all.
<img width="366" alt="image"
src="https://github.com/user-attachments/assets/e7c4ec79-2c97-47b2-b272-bc7124795f4d">


**Fix:**
Ignore cache for non-parsable files.

**Commit Details:**
- Other than tidying the code, the only change is to also check for
`isParsableFile()` inside `shouldUseCacheForFile()`.
- As part of this change, since I now import `isParsableFile`, I also
moved `gitBlobChecksumFromBuffer` out of `assets.ts` and into
`file-utils.ts`. `file-utils` is in `shared` but was importing it from
the editor's `assets.ts`, and along with it the entire editor code,
unnecessarily.

**Manual Tests:**
I hereby swear that:

- [x] I opened a hydrogen project and it loaded
- [x] I could navigate to various routes in Play mode
  • Loading branch information
liady committed Dec 13, 2024
1 parent 1797131 commit d10e894
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 32 deletions.
25 changes: 2 additions & 23 deletions editor/src/components/assets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import { dropLeadingSlash } from './filebrowser/filepath-utils'
import { assertNever, fastForEach } from '../core/shared/utils'
import { mapValues, propOrNull } from '../core/shared/object-utils'
import { emptySet } from '../core/shared/set-utils'
import { sha1 } from 'sha.js'
import type { GithubFileChanges, TreeConflicts } from '../core/shared/github/helpers'
import type { FileChecksumsWithFile } from './editor/store/editor-state'
import { memoize, valueDependentCache } from '../core/shared/memoize'
Expand All @@ -34,6 +33,8 @@ import type {
} from 'utopia-shared/src/types/assets'
import { filtered, fromField, fromTypeGuard } from '../core/shared/optics/optic-creators'
import { anyBy, toArrayOf } from '../core/shared/optics/optic-utilities'
import { gitBlobChecksumFromBuffer } from '../core/shared/file-utils'

export type {
AssetFileWithFileName,
ProjectContentTreeRoot,
Expand All @@ -60,18 +61,6 @@ export function getAllProjectAssetFiles(
return allProjectAssets
}

function getSHA1ChecksumInner(contents: string | Buffer): string {
return new sha1().update(contents).digest('hex')
}

// Memoized because it can be called for the same piece of code more than once before the
// checksum gets cached. For example in the canvas strategies and the regular dispatch flow, which don't share
// those cached checksum objects.
export const getSHA1Checksum = memoize(getSHA1ChecksumInner, {
maxSize: 10,
matchesArg: (first, second) => first === second,
})

export function gitBlobChecksumFromBase64(base64: string): string {
return gitBlobChecksumFromBuffer(Buffer.from(base64, 'base64'))
}
Expand All @@ -80,16 +69,6 @@ export function gitBlobChecksumFromText(text: string): string {
return gitBlobChecksumFromBuffer(Buffer.from(text, 'utf8'))
}

export function gitBlobChecksumFromBuffer(buffer: Buffer): string {
// This function returns the same SHA1 checksum string that git would return for the same contents.
// Given the contents in the buffer variable, the final checksum is calculated by hashing
// a string built as "<prefix><contents>". The prefix looks like "blob <contents_length_in_bytes><null_character>".
// Ref: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects
const prefix = Buffer.from(`blob ${buffer.byteLength}\0`)
const wrapped = Buffer.concat([prefix, buffer])
return getSHA1Checksum(wrapped)
}

export function checkFilesHaveSameContent(first: ProjectFile, second: ProjectFile): boolean {
switch (first.type) {
case 'DIRECTORY':
Expand Down
2 changes: 1 addition & 1 deletion editor/src/components/editor/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import type { LoginState } from '../../uuiui-deps'
import urljoin from 'url-join'
import JSZip from 'jszip'
import type { AssetFileWithFileName } from '../assets'
import { gitBlobChecksumFromBuffer } from '../assets'
import { gitBlobChecksumFromBuffer } from '../../core/shared/file-utils'
import { isLoginLost } from '../../common/user'
import { notice } from '../common/notice'
import type { EditorDispatch } from './action-types'
Expand Down
2 changes: 1 addition & 1 deletion editor/src/core/model/project-import.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
import { fileTypeFromFileName } from './project-file-utils'
import { assetResultForBase64, getFileExtension, imageResultForBase64 } from '../shared/file-utils'
import type { ProjectContentTreeRoot } from '../../components/assets'
import { gitBlobChecksumFromBuffer } from '../../components/assets'
import { gitBlobChecksumFromBuffer } from '../shared/file-utils'
import { addFileToProjectContents, walkContentsTreeAsync } from '../../components/assets'
import type { Either } from '../shared/either'
import { isRight, left, right } from '../shared/either'
Expand Down
25 changes: 24 additions & 1 deletion editor/src/core/shared/file-utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { gitBlobChecksumFromBuffer } from '../../components/assets'
import { memoize } from './memoize'
import { sha1 } from 'sha.js'
import stringHash from 'string-hash'
import type { Size } from './math-utils'
import { size } from './math-utils'
Expand Down Expand Up @@ -263,3 +264,25 @@ export function isJsOrTsFile(filename: string): boolean {
export function isParseableFile(filename: string): boolean {
return isJsOrTsFile(filename)
}

export function gitBlobChecksumFromBuffer(buffer: Buffer): string {
// This function returns the same SHA1 checksum string that git would return for the same contents.
// Given the contents in the buffer variable, the final checksum is calculated by hashing
// a string built as "<prefix><contents>". The prefix looks like "blob <contents_length_in_bytes><null_character>".
// Ref: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects
const prefix = Buffer.from(`blob ${buffer.byteLength}\0`)
const wrapped = Buffer.concat([prefix, buffer])
return getSHA1Checksum(wrapped)
}

function getSHA1ChecksumInner(contents: string | Buffer): string {
return new sha1().update(contents).digest('hex')
}

// Memoized because it can be called for the same piece of code more than once before the
// checksum gets cached. For example in the canvas strategies and the regular dispatch flow, which don't share
// those cached checksum objects.
export const getSHA1Checksum = memoize(getSHA1ChecksumInner, {
maxSize: 10,
matchesArg: (first, second) => first === second,
})
20 changes: 14 additions & 6 deletions editor/src/core/workers/parser-printer/parse-cache-utils.worker.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { isParseableFile } from '../../../core/shared/file-utils'
import { URL_HASH } from '../../../common/env-vars'
import { type ParseCacheOptions } from '../../../core/shared/parse-cache-utils'
import {
Expand Down Expand Up @@ -33,15 +34,22 @@ function logCacheMessage(parsingCacheOptions: ParseCacheOptions, ...messages: (s
}
}

function isArbitraryCodeFile(filename: string): boolean {
return filename === ARBITRARY_CODE_FILE_NAME
}

function stringIdentifiers(filename: string, content: string): (string | object)[] {
if (filename === ARBITRARY_CODE_FILE_NAME) {
if (isArbitraryCodeFile(filename)) {
return ['code', [{ content: content }]]
}
return [filename]
}

function shouldSkipCacheForFile(filename: string, parsingCacheOptions: ParseCacheOptions): boolean {
return filename === ARBITRARY_CODE_FILE_NAME && !parsingCacheOptions.cacheArbitraryCode
function shouldUseCacheForFile(filename: string, parsingCacheOptions: ParseCacheOptions): boolean {
return (
isParseableFile(filename) &&
(!isArbitraryCodeFile(filename) || parsingCacheOptions.cacheArbitraryCode)
)
}

export function getParseCacheVersion(): string {
Expand All @@ -59,7 +67,7 @@ export async function getParseResultFromCache(
parsingCacheOptions: ParseCacheOptions,
): Promise<ParseFileResult | null> {
const { filename, content } = file
if (shouldSkipCacheForFile(filename, parsingCacheOptions)) {
if (!shouldUseCacheForFile(filename, parsingCacheOptions)) {
return null
}
const cacheKey = getCacheKey(filename)
Expand All @@ -80,12 +88,12 @@ export async function storeParseResultInCache(
parsingCacheOptions: ParseCacheOptions,
): Promise<void> {
const { filename, content } = file
if (shouldSkipCacheForFile(filename, parsingCacheOptions)) {
if (!shouldUseCacheForFile(filename, parsingCacheOptions)) {
return
}
logCacheMessage(parsingCacheOptions, 'Caching', ...stringIdentifiers(filename, content))
const cacheKey = getCacheKey(filename)
if (filename === ARBITRARY_CODE_FILE_NAME) {
if (isArbitraryCodeFile(filename)) {
// for the special filename 'code.tsx', we store multiple contents, so we need to read it first
const cachedResult = (await getParseCacheStore().getItem<CachedParseResult>(cacheKey)) ?? {}
// limit the arbitrary code cache keys size
Expand Down

0 comments on commit d10e894

Please sign in to comment.