Skip to content

Commit

Permalink
Merge pull request #385 from hearchco/as/fix/images-bing
Browse files Browse the repository at this point in the history
fix(engines): refactor images into separate interface, FIXED BING IMAGES, updated user agent
  • Loading branch information
aleksasiriski authored Aug 30, 2024
2 parents 14ed63c + 71c8542 commit 9716d2e
Show file tree
Hide file tree
Showing 43 changed files with 501 additions and 260 deletions.
33 changes: 21 additions & 12 deletions generate/enginer/enginer.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@ import (
)

var (
typeName = flag.String("type", "", "type name; must be set")
output = flag.String("output", "", "output file name; default srcdir/<type>_enginer.go")
trimprefix = flag.String("trimprefix", "", "trim the `prefix` from the generated constant names")
buildTags = flag.String("tags", "", "comma-separated list of build tags to apply")
packageName = flag.String("packagename", "", "name of the package for generated code; default current package")
interfacesImport = flag.String("interfacesimport", "github.com/hearchco/agent/src/search/scraper", "source of the interface import, which is prefixed to interfaces; default github.com/hearchco/agent/src/search/scraper")
interfacesPackage = flag.String("interfacespackage", "scraper", "name of the package for the interfaces; default scraper")
interfaceEnginer = flag.String("interfaceenginer", "Enginer", "name of the nginer interface; default scraper.Enginer")
interfaceSearcher = flag.String("interfacesearcher", "Searcher", "name of the searcher interface; default scraper.Searcher")
interfaceSuggester = flag.String("interfacesuggester", "Suggester", "name of the suggester interface; default scraper.Suggester")
enginesImport = flag.String("enginesimport", "github.com/hearchco/agent/src/search/engines", "source of the engines import, which is prefixed to imports for engines; default github.com/hearchco/agent/src/search/engines")
typeName = flag.String("type", "", "type name; must be set")
output = flag.String("output", "", "output file name; default srcdir/<type>_enginer.go")
trimprefix = flag.String("trimprefix", "", "trim the `prefix` from the generated constant names")
buildTags = flag.String("tags", "", "comma-separated list of build tags to apply")
packageName = flag.String("packagename", "", "name of the package for generated code; default current package")
interfacesImport = flag.String("interfacesimport", "github.com/hearchco/agent/src/search/scraper", "source of the interface import, which is prefixed to interfaces; default github.com/hearchco/agent/src/search/scraper")
interfacesPackage = flag.String("interfacespackage", "scraper", "name of the package for the interfaces; default scraper")
interfaceEnginer = flag.String("interfaceenginer", "Enginer", "name of the nginer interface; default scraper.Enginer")
interfaceSearcher = flag.String("interfacesearcher", "Searcher", "name of the searcher interface; default scraper.Searcher")
interfaceImageSearcher = flag.String("interfaceimagesearcher", "ImageSearcher", "name of the searcher interface; default scraper.ImageSearcher")
interfaceSuggester = flag.String("interfacesuggester", "Suggester", "name of the suggester interface; default scraper.Suggester")
enginesImport = flag.String("enginesimport", "github.com/hearchco/agent/src/search/engines", "source of the engines import, which is prefixed to imports for engines; default github.com/hearchco/agent/src/search/engines")
)

// Usage is a replacement usage function for the flags package.
Expand Down Expand Up @@ -201,6 +202,7 @@ func (g *Generator) generate(typeName string) {
g.printEnginerLen(values)
g.printInterfaces(values, *interfaceEnginer)
g.printInterfaces(values, *interfaceSearcher)
g.printInterfaces(values, *interfaceImageSearcher)
g.printInterfaces(values, *interfaceSuggester)
}

Expand Down Expand Up @@ -327,7 +329,7 @@ func (g *Generator) printEnginerLen(values []Value) {

func (g *Generator) printInterfaces(values []Value, interfaceName string) {
g.Printf("\n")
g.Printf("\nfunc %sArray() [enginerLen]%s.%s {", strings.ToLower(interfaceName), *interfacesPackage, interfaceName)
g.Printf("\nfunc %sArray() [enginerLen]%s.%s {", toLowerFirstChar(interfaceName), *interfacesPackage, interfaceName)
g.Printf("\n\tvar engineArray [enginerLen]%s.%s", *interfacesPackage, interfaceName)
for _, v := range values {
if validConst(v) && validInterfacer(v, interfaceName) {
Expand All @@ -337,3 +339,10 @@ func (g *Generator) printInterfaces(values []Value, interfaceName string) {
g.Printf("\n\treturn engineArray")
g.Printf("\n}")
}

func toLowerFirstChar(s string) string {
if len(s) == 0 {
return s
}
return strings.ToLower(s[:1]) + s[1:]
}
8 changes: 4 additions & 4 deletions src/config/defaults_cat_images.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ import (
)

var imagesEngines = []engines.Name{
engines.BINGIMAGES,
engines.GOOGLEIMAGES,
engines.BING,
engines.GOOGLE,
}

var imagesRequiredEngines = []engines.Name{}

var imagesRequiredByOriginEngines = []engines.Name{
engines.BINGIMAGES,
engines.GOOGLEIMAGES,
engines.BING,
engines.GOOGLE,
}

var imagesPreferredEngines = []engines.Name{}
Expand Down
9 changes: 8 additions & 1 deletion src/router/routes/route_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,14 @@ func routeSearch(w http.ResponseWriter, r *http.Request, ver string, catsConf ma
}

// Search for results.
scrapedRes, err := search.Search(query, categoryName, opts, catsConf[categoryName])
var scrapedRes []result.Result
switch categoryName {
case category.IMAGES:
scrapedRes, err = search.ImageSearch(query, opts, catsConf[categoryName])
default:
scrapedRes, err = search.Search(query, categoryName, opts, catsConf[categoryName])
}

if err != nil {
// Server error.
werr := writeResponseJSON(w, http.StatusInternalServerError, ErrorResponse{
Expand Down
86 changes: 86 additions & 0 deletions src/search/engines/_engines_test/imagesearch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package _engines_test

import (
"context"
"strings"
"testing"

"github.com/hearchco/agent/src/search/result"
"github.com/hearchco/agent/src/search/scraper"
)

func CheckImageSearch(t *testing.T, e scraper.ImageSearcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) {
// TestCaseHasAnyResults
for _, tc := range tchar {
e.ReInitSearcher(context.Background())

resChan := make(chan result.ResultScraped, 100)
go e.ImageSearch(tc.Query, tc.Options, resChan)

results := make([]result.ResultScraped, 0)
for r := range resChan {
results = append(results, r)
}

if len(results) == 0 {
defer t.Errorf("Got no results for %q", tc.Query)
}
}

// TestCaseContainsResults
for _, tc := range tccr {
e.ReInitSearcher(context.Background())

resChan := make(chan result.ResultScraped, 100)
go e.ImageSearch(tc.Query, tc.Options, resChan)

results := make([]result.ResultScraped, 0)
for r := range resChan {
results = append(results, r)
}

if len(results) == 0 {
defer t.Errorf("Got no results for %q", tc.Query)
} else {
for _, rURL := range tc.ResultURLs {
found := false

for _, r := range results {
if strings.Contains(r.URL(), rURL) {
found = true
break
}
}

if !found {
defer t.Errorf("Couldn't find %q (%q).\nThe results: %q", rURL, tc.Query, results)
}
}
}
}

// TestCaseRankedResults
for _, tc := range tcrr {
e.ReInitSearcher(context.Background())

resChan := make(chan result.ResultScraped, 100)
go e.ImageSearch(tc.Query, tc.Options, resChan)

results := make([]result.ResultScraped, 0)
for r := range resChan {
results = append(results, r)
}

if len(results) == 0 {
defer t.Errorf("Got no results for %q", tc.Query)
} else if len(results) < len(tc.ResultURLs) {
defer t.Errorf("Number of results is less than test case URLs.")
} else {
for i, rURL := range tc.ResultURLs {
if !strings.Contains(results[i].URL(), rURL) {
defer t.Errorf("Wrong result on rank %q: %q (%q).\nThe results: %q", i+1, rURL, tc.Query, results)
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/hearchco/agent/src/search/scraper"
)

func CheckTestCases(t *testing.T, e scraper.Searcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) {
func CheckSearch(t *testing.T, e scraper.Searcher, tchar []TestCaseHasAnyResults, tccr []TestCaseContainsResults, tcrr []TestCaseRankedResults) {
// TestCaseHasAnyResults
for _, tc := range tchar {
e.ReInitSearcher(context.Background())
Expand Down
49 changes: 49 additions & 0 deletions src/search/engines/bing/dompaths.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,52 @@ var dompaths = scraper.DOMPaths{
Title: "h2 > a",
Description: "div.b_caption",
}

type thumbnailDomPaths struct {
Path string
Height string
Width string
}

type metadataDomPaths struct {
Path string
Attr string
}

type bingImagesDomPaths struct {
Result string
Metadata metadataDomPaths
Title string
ImgFormatStr string
Thumbnail [3]thumbnailDomPaths
Source string
}

var imgDompaths = bingImagesDomPaths{
// aria-live is also a possible attribute for not()
Result: "ul.dgControl_list > li[data-idx] > div.iuscp:not([vrhatt])",
Metadata: metadataDomPaths{
Path: "a.iusc",
Attr: "m",
},
Title: "div.infnmpt > div > ul > li > a",
ImgFormatStr: "div.imgpt > div > span",
Thumbnail: [...]thumbnailDomPaths{
{
Path: "a.iusc > div > img.mimg",
Height: "height",
Width: "width",
},
{
Path: "a.iusc > div > div > div.mimg > div",
Height: "data-height",
Width: "data-width",
},
{
Path: "a.iusc > div > div > div.mimg > img",
Height: "height",
Width: "width",
},
},
Source: "div.imgpt > div.img_info > div.lnkw > a",
}
Loading

0 comments on commit 9716d2e

Please sign in to comment.