Skip to content

Commit

Permalink
FEAT: rebuild legacy rank and store
Browse files Browse the repository at this point in the history
  • Loading branch information
kynrai committed Jun 11, 2024
1 parent eaf9a31 commit 29ffe67
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 157 deletions.
4 changes: 4 additions & 0 deletions checks/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ package checks
import (
"net/http"
"time"

"github.com/xray-web/web-check-api/checks/store/legacyrank"
)

type Checks struct {
Carbon *Carbon
LegacyRank *LegacyRank
Rank *Rank
SocialTags *SocialTags
Tls *Tls
Expand All @@ -18,6 +21,7 @@ func NewChecks() *Checks {
}
return &Checks{
Carbon: NewCarbon(client),
LegacyRank: NewLegacyRank(legacyrank.NewInMemoryStore()),

Check warning on line 24 in checks/checks.go

View check run for this annotation

Codecov / codecov/patch

checks/checks.go#L24

Added line #L24 was not covered by tests
Rank: NewRank(client),
SocialTags: NewSocialTags(client),
Tls: NewTls(client),
Expand Down
27 changes: 27 additions & 0 deletions checks/legacy_rank.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package checks

import "github.com/xray-web/web-check-api/checks/store/legacyrank"

type DomainRank struct {
Domain string `json:"domain"`
Rank int `json:"rank"`
}

type LegacyRank struct {
data legacyrank.Getter
}

func NewLegacyRank(lrg legacyrank.Getter) *LegacyRank {
return &LegacyRank{data: lrg}
}

func (lr *LegacyRank) LegacyRank(domain string) (*DomainRank, error) {
rank, err := lr.data.GetLegacyRank(domain)
if err != nil {
return nil, err

Check warning on line 21 in checks/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/legacy_rank.go#L21

Added line #L21 was not covered by tests
}
return &DomainRank{
Domain: domain,
Rank: rank,
}, nil
}
23 changes: 23 additions & 0 deletions checks/legacy_rank_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package checks

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/xray-web/web-check-api/checks/store/legacyrank"
)

func TestLegacyRank(t *testing.T) {
t.Parallel()

t.Run("get rank", func(t *testing.T) {
t.Parallel()
lr := NewLegacyRank(legacyrank.GetterFunc(func(domain string) (int, error) {
return 1, nil
}))
dr, err := lr.LegacyRank("example.com")
assert.NoError(t, err)
assert.Equal(t, 1, dr.Rank)
assert.Equal(t, "example.com", dr.Domain)
})
}
99 changes: 99 additions & 0 deletions checks/store/legacyrank/legacy_rank.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package legacyrank

import (
"archive/zip"
"bytes"
"context"
"encoding/csv"
"errors"
"io"
"log"
"net/http"
"strconv"
"sync"
"time"
)

var ErrNotFound = errors.New("domain not found")

type Getter interface {
GetLegacyRank(domain string) (int, error)
}

type GetterFunc func(domain string) (int, error)

func (f GetterFunc) GetLegacyRank(domain string) (int, error) {
return f(domain)

Check warning on line 26 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L25-L26

Added lines #L25 - L26 were not covered by tests
}

type InMemoryStore struct{}

var once sync.Once
var data map[string]int //map of domain to rank

func NewInMemoryStore() *InMemoryStore {
return &InMemoryStore{}
}

func (s *InMemoryStore) GetLegacyRank(url string) (int, error) {
once.Do(func() {
var err error
data, err = load()
if err != nil {
log.Println(err)

Check warning on line 43 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L43

Added line #L43 was not covered by tests
}
})

rank, ok := data[url]
if !ok {
return -1, ErrNotFound

Check warning on line 49 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L49

Added line #L49 was not covered by tests
}
return rank, nil
}

func load() (map[string]int, error) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip", nil)
if err != nil {
return nil, err

Check warning on line 59 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L59

Added line #L59 was not covered by tests
}
client := &http.Client{
Timeout: time.Second * 10,
}
resp, err := client.Do(req)
if err != nil {
return nil, err

Check warning on line 66 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L66

Added line #L66 was not covered by tests
}
defer resp.Body.Close()
b, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err

Check warning on line 71 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L71

Added line #L71 was not covered by tests
}
zf, err := zip.NewReader(bytes.NewReader(b), int64(len(b)))
if err != nil {
return nil, err

Check warning on line 75 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L75

Added line #L75 was not covered by tests
}
f, err := zf.Open("top-1m.csv")
if err != nil {
return nil, err

Check warning on line 79 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L79

Added line #L79 was not covered by tests
}
defer f.Close()
r := csv.NewReader(f)
data := make(map[string]int)
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, err

Check warning on line 90 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L90

Added line #L90 was not covered by tests
}
rank, err := strconv.Atoi(record[0])
if err != nil {
return nil, err

Check warning on line 94 in checks/store/legacyrank/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

checks/store/legacyrank/legacy_rank.go#L94

Added line #L94 was not covered by tests
}
data[record[1]] = rank
}
return data, nil
}
26 changes: 26 additions & 0 deletions checks/store/legacyrank/legacy_rank_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package legacyrank_test

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/xray-web/web-check-api/checks/store/legacyrank"
)

func TestInMemoryStore(t *testing.T) {
t.Parallel()

t.Run("get google rank", func(t *testing.T) {
t.Parallel()
ims := legacyrank.NewInMemoryStore()
dr, err := ims.GetLegacyRank("google.com")
assert.NoError(t, err, dr)
})

t.Run("get microsoft rank", func(t *testing.T) {
t.Parallel()
ims := legacyrank.NewInMemoryStore()
dr, err := ims.GetLegacyRank("microsoft.com")
assert.NoError(t, err, dr)
})
}
146 changes: 3 additions & 143 deletions handlers/legacy_rank.go
Original file line number Diff line number Diff line change
@@ -1,160 +1,20 @@
package handlers

import (
"archive/zip"
"encoding/csv"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
)

const (
fileURL = "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"
tempFilePath = "/tmp/top-1m.csv"
"github.com/xray-web/web-check-api/checks"
)

type RankResponse struct {
Domain string `json:"domain"`
Rank string `json:"rank"`
IsFound bool `json:"isFound"`
}

func checkLegacyRank(urlStr string) (RankResponse, error) {
var domain string
var err error

// Parse the URL to extract the domain
u, err := url.Parse(urlStr)
if err != nil {
return RankResponse{}, fmt.Errorf("invalid URL")
}

// Extract the domain from the parsed URL
if u.Host != "" {
domain = u.Host
} else {
// If Host is empty, try to extract the domain from the Path
parts := strings.Split(u.Path, "/")
if len(parts) > 0 {
domain = parts[0]
} else {
return RankResponse{}, fmt.Errorf("unable to extract domain from URL")
}
}

// Download and unzip the file if not in cache
if _, err := os.Stat(tempFilePath); os.IsNotExist(err) {
if err := downloadAndUnzip(fileURL); err != nil {
return RankResponse{}, err
}
}

// Parse the CSV and find the rank
file, err := os.Open(tempFilePath)
if err != nil {
return RankResponse{}, fmt.Errorf("error opening CSV file: %s", err)
}
defer file.Close()

reader := csv.NewReader(file)
for {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return RankResponse{}, fmt.Errorf("error reading CSV record: %s", err)
}

if record[1] == domain {
return RankResponse{
Domain: domain,
Rank: record[0],
IsFound: true,
}, nil
}
}

return RankResponse{
Domain: domain,
IsFound: false,
}, nil
}

func downloadAndUnzip(url string) error {
resp, err := http.Get(url)
if err != nil {
return fmt.Errorf("error downloading file: %s", err)
}
defer resp.Body.Close()

zipFile, err := os.Create(tempFilePath + ".zip")
if err != nil {
return fmt.Errorf("error creating zip file: %s", err)
}
defer zipFile.Close()

_, err = io.Copy(zipFile, resp.Body)
if err != nil {
return fmt.Errorf("error writing zip file: %s", err)
}

err = unzip(tempFilePath+".zip", "/tmp")
if err != nil {
return fmt.Errorf("error unzipping file: %s", err)
}

return nil
}

func unzip(src, dest string) error {
r, err := zip.OpenReader(src)
if err != nil {
return err
}
defer r.Close()

for _, f := range r.File {
rc, err := f.Open()
if err != nil {
return err
}
defer rc.Close()

path := filepath.Join(dest, f.Name)
if f.FileInfo().IsDir() {
os.MkdirAll(path, f.Mode())
} else {
os.MkdirAll(filepath.Dir(path), os.ModePerm)
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
if err != nil {
return err
}
defer f.Close()

_, err = io.Copy(f, rc)
if err != nil {
return err
}
}
}

return nil
}

func HandleLegacyRank() http.Handler {
func HandleLegacyRank(l *checks.LegacyRank) http.Handler {

Check warning on line 9 in handlers/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

handlers/legacy_rank.go#L9

Added line #L9 was not covered by tests
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
rawURL, err := extractURL(r)
if err != nil {
JSONError(w, ErrMissingURLParameter, http.StatusBadRequest)
return
}

result, err := checkLegacyRank(rawURL.String())
result, err := l.LegacyRank(rawURL.Hostname())

Check warning on line 17 in handlers/legacy_rank.go

View check run for this annotation

Codecov / codecov/patch

handlers/legacy_rank.go#L17

Added line #L17 was not covered by tests
if err != nil {
JSONError(w, err, http.StatusInternalServerError)
return
Expand Down
Loading

0 comments on commit 29ffe67

Please sign in to comment.