Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Image Scraping #5562

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions graphql/schema/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ type Query {
input: ScrapeSingleGroupInput!
): [ScrapedGroup!]!

"Scrape for a single image"
scrapeSingleImage(
source: ScraperSourceInput!
input: ScrapeSingleImageInput!
): [ScrapedImage!]!

"Scrapes content based on a URL"
scrapeURL(url: String!, ty: ScrapeContentType!): ScrapedContent

Expand All @@ -182,6 +188,8 @@ type Query {
scrapeSceneURL(url: String!): ScrapedScene
"Scrapes a complete gallery record based on a URL"
scrapeGalleryURL(url: String!): ScrapedGallery
"Scrapes a complete image record based on a URL"
scrapeImageURL(url: String!): ScrapedImage
"Scrapes a complete movie record based on a URL"
scrapeMovieURL(url: String!): ScrapedMovie
@deprecated(reason: "Use scrapeGroupURL instead")
Expand Down
33 changes: 33 additions & 0 deletions graphql/schema/types/scraper.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ enum ScrapeType {
"Type of the content a scraper generates"
enum ScrapeContentType {
GALLERY
IMAGE
MOVIE
GROUP
PERFORMER
Expand All @@ -22,6 +23,7 @@ union ScrapedContent =
| ScrapedTag
| ScrapedScene
| ScrapedGallery
| ScrapedImage
| ScrapedMovie
| ScrapedGroup
| ScrapedPerformer
Expand All @@ -41,6 +43,8 @@ type Scraper {
scene: ScraperSpec
"Details for gallery scraper"
gallery: ScraperSpec
"Details for image scraper"
image: ScraperSpec
"Details for movie scraper"
movie: ScraperSpec @deprecated(reason: "use group")
"Details for group scraper"
Expand Down Expand Up @@ -128,6 +132,26 @@ input ScrapedGalleryInput {
# no studio, tags or performers
}

type ScrapedImage {
title: String
code: String
details: String
photographer: String
urls: [String!]
date: String
studio: ScrapedStudio
tags: [ScrapedTag!]
performers: [ScrapedPerformer!]
}

input ScrapedImageInput {
title: String
code: String
details: String
urls: [String!]
date: String
}

input ScraperSourceInput {
"Index of the configured stash-box instance to use. Should be unset if scraper_id is set"
stash_box_index: Int @deprecated(reason: "use stash_box_endpoint")
Expand Down Expand Up @@ -190,6 +214,15 @@ input ScrapeSingleGalleryInput {
gallery_input: ScrapedGalleryInput
}

input ScrapeSingleImageInput {
"Instructs to query by string"
query: String
"Instructs to query by image id"
image_id: ID
"Instructs to query by image fragment"
image_input: ScrapedImageInput
}

input ScrapeSingleMovieInput {
"Instructs to query by string"
query: String
Expand Down
6 changes: 3 additions & 3 deletions internal/api/resolver_mutation_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func (r *mutationResolver) getImage(ctx context.Context, id int) (ret *models.Im
return ret, nil
}

func (r *mutationResolver) ImageUpdate(ctx context.Context, input ImageUpdateInput) (ret *models.Image, err error) {
func (r *mutationResolver) ImageUpdate(ctx context.Context, input models.ImageUpdateInput) (ret *models.Image, err error) {
translator := changesetTranslator{
inputMap: getUpdateInputMap(ctx),
}
Expand All @@ -46,7 +46,7 @@ func (r *mutationResolver) ImageUpdate(ctx context.Context, input ImageUpdateInp
return r.getImage(ctx, ret.ID)
}

func (r *mutationResolver) ImagesUpdate(ctx context.Context, input []*ImageUpdateInput) (ret []*models.Image, err error) {
func (r *mutationResolver) ImagesUpdate(ctx context.Context, input []*models.ImageUpdateInput) (ret []*models.Image, err error) {
inputMaps := getUpdateInputMaps(ctx)

// Start the transaction and save the image
Expand Down Expand Up @@ -89,7 +89,7 @@ func (r *mutationResolver) ImagesUpdate(ctx context.Context, input []*ImageUpdat
return newRet, nil
}

func (r *mutationResolver) imageUpdate(ctx context.Context, input ImageUpdateInput, translator changesetTranslator) (*models.Image, error) {
func (r *mutationResolver) imageUpdate(ctx context.Context, input models.ImageUpdateInput, translator changesetTranslator) (*models.Image, error) {
imageID, err := strconv.Atoi(input.ID)
if err != nil {
return nil, fmt.Errorf("converting id: %w", err)
Expand Down
42 changes: 42 additions & 0 deletions internal/api/resolver_query_scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ func (r *queryResolver) ScrapeGalleryURL(ctx context.Context, url string) (*scra
return ret, nil
}

func (r *queryResolver) ScrapeImageURL(ctx context.Context, url string) (*scraper.ScrapedImage, error) {
content, err := r.scraperCache().ScrapeURL(ctx, url, scraper.ScrapeContentTypeImage)
if err != nil {
return nil, err
}

return marshalScrapedImage(content)
}

func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
content, err := r.scraperCache().ScrapeURL(ctx, url, scraper.ScrapeContentTypeMovie)
if err != nil {
Expand Down Expand Up @@ -491,6 +500,39 @@ func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source scraper.
return ret, nil
}

func (r *queryResolver) ScrapeSingleImage(ctx context.Context, source scraper.Source, input ScrapeSingleImageInput) ([]*scraper.ScrapedImage, error) {
if source.StashBoxIndex != nil {
return nil, ErrNotSupported
}

if source.ScraperID == nil {
return nil, fmt.Errorf("%w: scraper_id must be set", ErrInput)
}

var c scraper.ScrapedContent

switch {
case input.ImageID != nil:
imageID, err := strconv.Atoi(*input.ImageID)
if err != nil {
return nil, fmt.Errorf("%w: image id is not an integer: '%s'", ErrInput, *input.ImageID)
}
c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, imageID, scraper.ScrapeContentTypeImage)
if err != nil {
return nil, err
}
return marshalScrapedImages([]scraper.ScrapedContent{c})
case input.ImageInput != nil:
c, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Image: input.ImageInput})
if err != nil {
return nil, err
}
return marshalScrapedImages([]scraper.ScrapedContent{c})
default:
return nil, ErrNotImplemented
}
}

func (r *queryResolver) ScrapeSingleMovie(ctx context.Context, source scraper.Source, input ScrapeSingleMovieInput) ([]*models.ScrapedMovie, error) {
return nil, ErrNotSupported
}
Expand Down
31 changes: 31 additions & 0 deletions internal/api/scraped_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@ func marshalScrapedGalleries(content []scraper.ScrapedContent) ([]*scraper.Scrap
return ret, nil
}

func marshalScrapedImages(content []scraper.ScrapedContent) ([]*scraper.ScrapedImage, error) {
var ret []*scraper.ScrapedImage
for _, c := range content {
if c == nil {
// graphql schema requires images to be non-nil
continue
}

switch g := c.(type) {
case *scraper.ScrapedImage:
ret = append(ret, g)
case scraper.ScrapedImage:
ret = append(ret, &g)
default:
return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedImage", models.ErrConversion)
}
}

return ret, nil
}

// marshalScrapedMovies converts ScrapedContent into ScrapedMovie. If conversion
// fails, an error is returned.
func marshalScrapedMovies(content []scraper.ScrapedContent) ([]*models.ScrapedMovie, error) {
Expand Down Expand Up @@ -129,6 +150,16 @@ func marshalScrapedGallery(content scraper.ScrapedContent) (*scraper.ScrapedGall
return g[0], nil
}

// marshalScrapedImage will marshal a single scraped image
func marshalScrapedImage(content scraper.ScrapedContent) (*scraper.ScrapedImage, error) {
g, err := marshalScrapedImages([]scraper.ScrapedContent{content})
if err != nil {
return nil, err
}

return g[0], nil
}

// marshalScrapedMovie will marshal a single scraped movie
func marshalScrapedMovie(content scraper.ScrapedContent) (*models.ScrapedMovie, error) {
m, err := marshalScrapedMovies([]scraper.ScrapedContent{content})
Expand Down
22 changes: 22 additions & 0 deletions pkg/models/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,28 @@ type ImageFilterType struct {
UpdatedAt *TimestampCriterionInput `json:"updated_at"`
}

type ImageUpdateInput struct {
ClientMutationID *string `json:"clientMutationId"`
ID string `json:"id"`
Title *string `json:"title"`
Code *string `json:"code"`
Urls []string `json:"urls"`
Date *string `json:"date"`
Details *string `json:"details"`
Photographer *string `json:"photographer"`
Rating100 *int `json:"rating100"`
Organized *bool `json:"organized"`
SceneIds []string `json:"scene_ids"`
StudioID *string `json:"studio_id"`
TagIds []string `json:"tag_ids"`
PerformerIds []string `json:"performer_ids"`
GalleryIds []string `json:"gallery_ids"`
PrimaryFileID *string `json:"primary_file_id"`

// deprecated
URL *string `json:"url"`
}

type ImageDestroyInput struct {
ID string `json:"id"`
DeleteFile *bool `json:"delete_file"`
Expand Down
1 change: 1 addition & 0 deletions pkg/scraper/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type scraperActionImpl interface {

scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*ScrapedScene, error)
scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*ScrapedGallery, error)
scrapeImageByImage(ctx context.Context, image *models.Image) (*ScrapedImage, error)
}

func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, globalConfig GlobalConfig) scraperActionImpl {
Expand Down
58 changes: 58 additions & 0 deletions pkg/scraper/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,18 @@ type GalleryFinder interface {
models.URLLoader
}

type ImageFinder interface {
models.ImageGetter
models.FileLoader
models.URLLoader
}

type Repository struct {
TxnManager models.TxnManager

SceneFinder SceneFinder
GalleryFinder GalleryFinder
ImageFinder ImageFinder
TagFinder TagFinder
PerformerFinder PerformerFinder
GroupFinder match.GroupNamesFinder
Expand All @@ -93,6 +100,7 @@ func NewRepository(repo models.Repository) Repository {
TxnManager: repo.TxnManager,
SceneFinder: repo.Scene,
GalleryFinder: repo.Gallery,
ImageFinder: repo.Image,
TagFinder: repo.Tag,
PerformerFinder: repo.Performer,
GroupFinder: repo.Group,
Expand Down Expand Up @@ -357,6 +365,28 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty Scrape
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
}

if scraped != nil {
ret = scraped
}

case ScrapeContentTypeImage:
is, ok := s.(imageScraper)
if !ok {
return nil, fmt.Errorf("%w: cannot use scraper %s as a image scraper", ErrNotSupported, scraperID)
}

scene, err := c.getImage(ctx, id)
if err != nil {
return nil, fmt.Errorf("scraper %s: unable to load image id %v: %w", scraperID, id, err)
}

// don't assign nil concrete pointer to ret interface, otherwise nil
// detection is harder
scraped, err := is.viaImage(ctx, c.client, scene)
if err != nil {
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
}

if scraped != nil {
ret = scraped
}
Expand Down Expand Up @@ -426,3 +456,31 @@ func (c Cache) getGallery(ctx context.Context, galleryID int) (*models.Gallery,
}
return ret, nil
}

func (c Cache) getImage(ctx context.Context, imageID int) (*models.Image, error) {
var ret *models.Image
r := c.repository
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
qb := r.ImageFinder

var err error
ret, err = qb.Find(ctx, imageID)
if err != nil {
return err
}

if ret == nil {
return fmt.Errorf("gallery with id %d not found", imageID)
}

err = ret.LoadFiles(ctx, qb)
if err != nil {
return err
}

return ret.LoadURLs(ctx, qb)
}); err != nil {
return nil, err
}
return ret, nil
}
Loading
Loading