Skip to content

Commit

Permalink
Collects google ads/answers + captcha solver test
Browse files Browse the repository at this point in the history
  • Loading branch information
karust committed May 5, 2024
1 parent 482f725 commit 0a3c07b
Show file tree
Hide file tree
Showing 13 changed files with 283 additions and 92 deletions.
21 changes: 7 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ docker-compose up --build
| file | File extension to search (e.g. `PDF`, `DOC`) |
| site | Search within a specific website |
| limit | Limit the number of results
| answers | Include google answers as negative rank indexes (e.g. `true`, `false`)

### **Search**
### *Example request*
Expand All @@ -47,27 +48,18 @@ You can replace `google` to `yandex` or `baidu` in query to change search engine
"rank": 1,
"url": "https://en.wikipedia.org/wiki/%22Hello,_World!%22_program",
"title": "\"Hello, World!\" program",
"description": "A \"Hello, World!\" program is generally a computer program that ignores any input, and outputs or displays a message similar to \"Hello, World!\"."
"description": "A \"Hello, World!\" program is generally a computer program that ignores any input, and outputs or displays a message similar to \"Hello, World!\".",
"ad": false
},
]
```
### **Images**
### **Images** **[WIP]**
### *Example request*
Get 100 **Google** results for `golden puppy`:
```
GET http://127.0.0.1:7000/google/image?text=golden puppy&limit=100
```
### *Example response*
```JSON
[
{
"rank": 1,
"url": "https://en.wikipedia.org/wiki/%22Hello,_World!%22_program",
"title": "\"Hello, World!\" program",
"description": "A \"Hello, World!\" program is generally a computer program that ignores any input, and outputs or displays a message similar to \"Hello, World!\"."
},
]
```


## CLI <a name="cli"></a> ⌨️
* Use `-h` flag to see commands.
Expand All @@ -86,7 +78,8 @@ As a result you should get JSON output containting search results:
"rank": 1,
"url": "https://www.cyberoptik.net/blog/6-sure-fire-ways-to-get-banned-from-google/",
"title": "11 Sure-Fire Ways to Get Banned From Google | CyberOptik",
"description": "How To Get Banned From Google · 1. Cloaking: The Art of Deception · 2. Plagiarism: Because Originality is Overrated · 3. Keyword Stuffing: More is Always Better · 4 ..."
"description": "How To Get Banned From Google · 1. Cloaking: The Art of Deception · 2. Plagiarism: Because Originality is Overrated · 3. Keyword Stuffing: More is Always Better · 4 ...",
"ad": false
},
]
```
Expand Down
21 changes: 13 additions & 8 deletions cmd/root.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package cmd

import (
"errors"
"fmt"
"strings"

Expand All @@ -13,16 +12,21 @@ import (
)

const (
version = "0.2.1"
version = "0.3"
defaultConfigFilename = "config"
envPrefix = "OPENSERP"
)

type Config struct {
App AppConfig `mapstructure:"app"`
GoogleConfig core.SearchEngineOptions `mapstructure:"google"`
YandexConfig core.SearchEngineOptions `mapstructure:"yandex"`
BaiduConfig core.SearchEngineOptions `mapstructure:"baidu"`
App AppConfig `mapstructure:"app"`
Config2Capcha Config2Captcha `mapstructure:"2captcha"`
GoogleConfig core.SearchEngineOptions `mapstructure:"google"`
YandexConfig core.SearchEngineOptions `mapstructure:"yandex"`
BaiduConfig core.SearchEngineOptions `mapstructure:"baidu"`
}

type Config2Captcha struct {
ApiKey string `mapstructure:"apikey"`
}

type AppConfig struct {
Expand Down Expand Up @@ -88,7 +92,7 @@ func initializeConfig(cmd *cobra.Command) error {
// 1. Config. Return an error if we cannot parse the config file.
err := v.ReadInConfig()
if err != nil {
err = errors.New(fmt.Sprintf("Cannot read config: %v", err))
err = fmt.Errorf("cannot read config: %v", err)
logrus.Warn(err)
}

Expand All @@ -107,7 +111,7 @@ func initializeConfig(cmd *cobra.Command) error {
// Dump Viper values to config struct
err = v.Unmarshal(&config)
if err != nil {
return errors.New(fmt.Sprintf("Cannot unmarshall config: %v", err))
return fmt.Errorf("cannot unmarshall config: %v", err)
}

if config.App.IsDebug {
Expand All @@ -128,4 +132,5 @@ func init() {
RootCmd.PersistentFlags().BoolVarP(&config.App.IsLeakless, "leakless", "l", false, "Use leakless mode to insure browser instances are closed after search")
RootCmd.PersistentFlags().BoolVarP(&config.App.IsRawRequests, "raw", "r", false, "Disable browser usage, use HTTP requests")
RootCmd.PersistentFlags().BoolVarP(&config.App.IsLeaveHead, "leave", "", false, "Leave browser and tabs opened after search is made")
RootCmd.PersistentFlags().StringVarP(&config.Config2Capcha.ApiKey, "2captcha_key", "", "", "2 captcha api key")
}
9 changes: 5 additions & 4 deletions cmd/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ func searchBrowser(engineType string, query core.Query) ([]core.SearchResult, er
var engine core.SearchEngine

opts := core.BrowserOpts{
IsHeadless: !config.App.IsBrowserHead, // Disable headless if browser head mode is set
IsLeakless: config.App.IsLeakless,
Timeout: time.Second * time.Duration(config.App.Timeout),
LeavePageOpen: config.App.IsLeaveHead,
IsHeadless: !config.App.IsBrowserHead, // Disable headless if browser head mode is set
IsLeakless: config.App.IsLeakless,
Timeout: time.Second * time.Duration(config.App.Timeout),
LeavePageOpen: config.App.IsLeaveHead,
CaptchaSolverApiKey: config.Config2Capcha.ApiKey,
}

if config.App.IsDebug {
Expand Down
9 changes: 5 additions & 4 deletions cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ var serveCMD = &cobra.Command{

func serve(cmd *cobra.Command, args []string) {
opts := core.BrowserOpts{
IsHeadless: !config.App.IsBrowserHead, // Disable headless if browser head mode is set
IsLeakless: config.App.IsLeakless,
Timeout: time.Second * time.Duration(config.App.Timeout),
LeavePageOpen: config.App.IsLeaveHead,
IsHeadless: !config.App.IsBrowserHead, // Disable headless if browser head mode is set
IsLeakless: config.App.IsLeakless,
Timeout: time.Second * time.Duration(config.App.Timeout),
LeavePageOpen: config.App.IsLeaveHead,
CaptchaSolverApiKey: config.Config2Capcha.ApiKey,
}

if config.App.IsDebug {
Expand Down
17 changes: 11 additions & 6 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,20 @@ app:
timeout: 15
head: false
leakless: false
leave_head: false

2captcha:
apikey: "123123123123123"

google:
rate_requests: 4 # Number of requests per Minute
rate_burst: 2 # Number of non-ratelimited requests per Minute
rate_requests: 4 # Number of requests per Minute
rate_burst: 2 # Number of non-ratelimited requests per Minute
captcha: true

yandex:
rate_requests: 4
rate_burst: 2
rate_requests: 4
rate_burst: 2

baidu:
rate_requests: 4
rate_burst: 2
rate_requests: 4
rate_burst: 2
28 changes: 28 additions & 0 deletions core/captcha.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package core

import (
api2captcha "github.com/2captcha/2captcha-go"
)

type CaptchaSolver struct {
client *api2captcha.Client
}

func NewSolver(apikey string) *CaptchaSolver {
cs := CaptchaSolver{}
cs.client = api2captcha.NewClient(apikey)
return &cs
}

func (cs *CaptchaSolver) SolveReCaptcha2(sitekey, pageUrl, dataS string) (string, error) {
cap := api2captcha.ReCaptcha{
SiteKey: sitekey,
Url: pageUrl,
DataS: dataS,
Invisible: false,
Action: "verify",
}
req := cap.ToRequest()
req.SetProxy("HTTPS", "login:password@IP_address:PORT")
return cs.client.Solve(req)
}
20 changes: 20 additions & 0 deletions core/captcha_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package core

import (
"testing"
)

var (
API_KEY = ""
)

func Test2Captcha(t *testing.T) {
solver := NewSolver(API_KEY)
sitekey := "6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b"
url := "https://www.google.com/sorry/index?continue=https://www.google.de/search%3Fhl%3DDE%26lr%3Dlang_de%26nfpr%3D1%26num%3D500%26pws%3D0%26q%3Dwhere%2Bwhy%2Beach&hl=DE&q=EgRegw55GObHiq4GIjDqmzFKayGXrS2-s9ooWfcskhpK8-6tIjWSaSvhxd3f5eAyUXj7lYq2DYLDXB8ASz0yAXJaAUM"
datas := "Ghk0n7ZQNDS0c7ES53eef_YBfSdfeXnyRD0p2OR0R4Dg91CUXKS_hio5Do6TpJ8sHhhOat_NymTASZGe1gqAjP7w9dSvhvRT7QXsrdziO3JPngLDSRzDdjT42GDcSbO0kzInlDPxe1yy2t4yifo9xHpMnlZU7pTVNTQUIXqOMLHAR-iERi6aoSQDQ4d-88-jW3LEinquxEut0OhHG2l2stwG9AnCmNvCsUNJda-H24saFlOh5csK9KNXeeQmpr6at52_skMIMiLXSlY56vYFVCRMkXLQdAM"
resp, err := solver.SolveReCaptcha2(sitekey, url, datas)
if err != nil || resp == "" {
t.Fatalf("Failed to solve recaptchaV2: %s", err)
}
}
31 changes: 19 additions & 12 deletions core/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ import (
"github.com/gofiber/fiber/v2"
)

var ErrCaptcha = errors.New("Captcha detected")
var ErrSearchTimeout = errors.New("Timeout. Cannot find element on page")
var ErrCaptcha = errors.New("captcha detected")
var ErrSearchTimeout = errors.New("timeout. Cannot find element on page")

type SearchResult struct {
Rank int `json:"rank"`
URL string `json:"url"`
Title string `json:"title"`
Description string `json:"description"`
Ad bool `json:"ad"`
}

func ConvertSearchResultsMap(searchResultsMap map[string]SearchResult) *[]SearchResult {
Expand All @@ -39,6 +40,7 @@ type Query struct {
Filetype string // File extension to search.
Site string // Search site
Limit int // Limit the number of results
Answers bool // Include question and answers from SERP page to results with negative indexes
}

func (q Query) IsEmpty() bool {
Expand All @@ -48,23 +50,27 @@ func (q Query) IsEmpty() bool {
return false
}

func (q *Query) InitFromContext(c *fiber.Ctx) error {
q.Text = c.Query("text")
q.LangCode = c.Query("lang")
q.DateInterval = c.Query("date")
q.Filetype = c.Query("file")
q.Site = c.Query("site")
func (searchQuery *Query) InitFromContext(reqCtx *fiber.Ctx) error {
searchQuery.Text = reqCtx.Query("text")
searchQuery.LangCode = reqCtx.Query("lang")
searchQuery.DateInterval = reqCtx.Query("date")
searchQuery.Filetype = reqCtx.Query("file")
searchQuery.Site = reqCtx.Query("site")

limit, err := strconv.Atoi(c.Query("limit", "25"))
limit, err := strconv.Atoi(reqCtx.Query("limit", "25"))
if err != nil {
return err
}
q.Limit = limit
searchQuery.Limit = limit

if q.IsEmpty() {
return errors.New("Query cannot be empty")
searchQuery.Answers, err = strconv.ParseBool(reqCtx.Query("answers", "0"))
if err != nil {
return err
}

if searchQuery.IsEmpty() {
return errors.New("Query cannot be empty")
}
return nil
}

Expand All @@ -73,6 +79,7 @@ type SearchEngineOptions struct {
RateTime int64 `mapstructure:"rate_seconds"`
RateBurst int `mapstructure:"rate_burst"`
SelectorTimeout int64 `mapstructure:"selector_timeout"` // CSS selector timeout in seconds
IsSolveCaptcha bool `mapstructure:"captcha"`
}

func (o *SearchEngineOptions) Init() {
Expand Down
9 changes: 4 additions & 5 deletions core/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package core

import (
"context"
"errors"
"fmt"
"strings"

Expand Down Expand Up @@ -52,9 +51,9 @@ func NewServer(host string, port int, searchEngines ...SearchEngine) *Server {
if err != nil {
switch err {
case ErrCaptcha:
err = errors.New(fmt.Sprintf("Captcha found, please stop sending requests for a while\n%s", err))
err = fmt.Errorf("captcha found, please stop sending requests for a while\n%s", err)
case ErrSearchTimeout:
err = errors.New(fmt.Sprintf("%s", err))
err = fmt.Errorf("%s", err)
}

logrus.Errorf("Error during %s search: %s", locEngine.Name(), err)
Expand Down Expand Up @@ -87,9 +86,9 @@ func NewServer(host string, port int, searchEngines ...SearchEngine) *Server {
if err != nil {
switch err {
case ErrCaptcha:
err = errors.New(fmt.Sprintf("Captcha found, please stop sending requests for a while\n%s", err))
err = fmt.Errorf("captcha found, please stop sending requests for a while: %s", err)
case ErrSearchTimeout:
err = errors.New(fmt.Sprintf("%s", err))
err = fmt.Errorf("%s", err)
}

logrus.Errorf("Error during %s search: %s", locEngine.Name(), err)
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/karust/openserp
go 1.20

require (
github.com/2captcha/2captcha-go v1.1.4
github.com/PuerkitoBio/goquery v1.8.1
github.com/corpix/uarand v0.2.0
github.com/go-rod/rod v0.113.3
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RX
cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/2captcha/2captcha-go v1.1.4 h1:Fm62VPvVhEHYQ8AI+/uquiTg41ml9f8ASjUkVuBvHcE=
github.com/2captcha/2captcha-go v1.1.4/go.mod h1:hYOq+KVOq/0zAG6OTYW7Y313qDkHv58CcaOyjdBQSco=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM=
Expand Down
Loading

0 comments on commit 0a3c07b

Please sign in to comment.