Skip to content

Commit

Permalink
improve password detection and clean up codes
Browse files Browse the repository at this point in the history
  • Loading branch information
KJHJason committed May 15, 2024
1 parent fc8a602 commit 4e81cde
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 65 deletions.
109 changes: 54 additions & 55 deletions api/pixivfanbox/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package pixivfanbox

import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"net/http"
"path/filepath"

Expand All @@ -19,39 +21,60 @@ import (
// https://fanbox.pixiv.help/hc/en-us/articles/360011057793-What-types-of-attachments-can-I-post-
var pixivFanboxAllowedImageExt = []string{"jpg", "jpeg", "png", "gif"}

func detectUrlsAndPasswordsInPost(text, postFolderPath string, articleBlocks FanboxArticleBlocks, dlOptions *PixivFanboxDlOptions) ([]*httpfuncs.ToDownload, bool) {
loggedPassword := false
if api.DetectPasswordInText(text) {
func detectUrlsAndLogPasswordsInPost(blocks FanboxArticleBlocks, postFolderPath string, dlOptions *PixivFanboxDlOptions) []*httpfuncs.ToDownload {
var combinedText string
var gdriveLinks []*httpfuncs.ToDownload
for _, block := range blocks {
if block.Type == "image" { // image already processed in ImageMap
continue
}

// note: usually block.Type should be "p"
combinedText += block.Text + "\n"

linkUrlSlice := block.Links
if len(block.Links) == 0 {
continue
}
for _, linkUrlEl := range linkUrlSlice {
linkUrl := linkUrlEl.Url
api.DetectOtherExtDLLink(linkUrl, postFolderPath)
if api.DetectGDriveLinks(linkUrl, postFolderPath, true, dlOptions.Configs.LogUrls) && dlOptions.DlGdrive {
gdriveLinks = append(gdriveLinks, &httpfuncs.ToDownload{
Url: linkUrl,
FilePath: filepath.Join(postFolderPath, constants.GDRIVE_FOLDER),
})
continue
}
}
}

if api.DetectPasswordInText(combinedText) {
// Log the entire post text if it contains a password
filePath := filepath.Join(postFolderPath, constants.PASSWORD_FILENAME)
if !iofuncs.PathExists(filePath) {
loggedPassword = true
postBodyStr := "Found potential password in the post:\n\n"
for _, articleContent := range articleBlocks {
articleText := articleContent.Text
if articleText != "" {
postBodyStr += articleText + "\n"
}
}
logFileSize, err := iofuncs.GetFileSize(filePath)
doesNotExist := errors.Is(err, fs.ErrNotExist)
if !doesNotExist && err != nil { // unexpected OS error
err = fmt.Errorf(
"pixiv fanbox error %d: error getting file size of %q More info => %w",
cdlerrors.OS_ERROR,
filePath,
err,
)
logger.LogError(err, false, logger.ERROR)
return gdriveLinks
}

if logFileSize == 0 || doesNotExist { // checks if password file is empty or does not exist to avoid writing the same password multiple times
postBodyStr := "Found potential password in the post:\n\n" + combinedText
logger.LogMessageToPath(
postBodyStr,
filePath,
logger.ERROR,
logger.INFO,
)
}
}

var gdriveLinks []*httpfuncs.ToDownload
if dlOptions.Configs.LogUrls {
api.DetectOtherExtDLLink(text, postFolderPath)
}
if api.DetectGDriveLinks(text, postFolderPath, false, dlOptions.Configs.LogUrls) && dlOptions.DlGdrive {
gdriveLinks = append(gdriveLinks, &httpfuncs.ToDownload{
Url: text,
FilePath: filepath.Join(postFolderPath, constants.GDRIVE_FOLDER),
})
}
return gdriveLinks, loggedPassword
return gdriveLinks
}

func processFanboxArticlePost(postBody json.RawMessage, postFolderPath string, dlOptions *PixivFanboxDlOptions) ([]*httpfuncs.ToDownload, []*httpfuncs.ToDownload, error) {
Expand Down Expand Up @@ -90,36 +113,12 @@ func processFanboxArticlePost(postBody json.RawMessage, postFolderPath string, d
return urlsSlice, gdriveLinks, nil
}

loggedPassword := false
for _, articleBlock := range articleBlocks {
text := articleBlock.Text
if text != "" && !loggedPassword {
var detectedGdriveUrls []*httpfuncs.ToDownload
detectedGdriveUrls, loggedPassword = detectUrlsAndPasswordsInPost(
text,
postFolderPath,
articleBlocks,
dlOptions,
)
gdriveLinks = append(gdriveLinks, detectedGdriveUrls...)
}

articleLinks := articleBlock.Links
if len(articleLinks) > 0 {
for _, articleLink := range articleLinks {
linkUrl := articleLink.Url
api.DetectOtherExtDLLink(linkUrl, postFolderPath)
if api.DetectGDriveLinks(linkUrl, postFolderPath, true, dlOptions.Configs.LogUrls) && dlOptions.DlGdrive {
gdriveLinks = append(gdriveLinks, &httpfuncs.ToDownload{
Url: linkUrl,
FilePath: filepath.Join(postFolderPath, constants.GDRIVE_FOLDER),
})
continue
}
}
}
}

detectedGdriveUrls := detectUrlsAndLogPasswordsInPost(
articleBlocks,
postFolderPath,
dlOptions,
)
gdriveLinks = append(gdriveLinks, detectedGdriveUrls...)
return urlsSlice, gdriveLinks, nil
}

Expand Down
6 changes: 6 additions & 0 deletions api/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,12 @@ func DetectPasswordInText(text string) bool {
return true
}
}

for _, passwordRegex := range constants.PASSWORD_REGEXES {
if passwordRegex.MatchString(text) {
return true
}
}
return false
}

Expand Down
12 changes: 7 additions & 5 deletions constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,13 @@ var (
PAGE_NUM_REGEX = regexp.MustCompile(
fmt.Sprintf(`^%s$`, PAGE_NUM_REGEX_STR),
)
NUMBER_REGEX = regexp.MustCompile(`^\d+$`)
NUMBER_REGEX = regexp.MustCompile(`^\d+$`)
PASSWORD_TEXTS = []string{"パス", "Pass", "pass", "密码"}
PASSWORD_REGEXES = []*regexp.Regexp{
regexp.MustCompile(`ダウンロード(?:<\/span>)?<\/a><\/p><p>[\w-]+<\/p>`),
regexp.MustCompile(`ダウンロード\n([\w-]+)\n`),
}
EXTERNAL_DOWNLOAD_PLATFORMS = []string{"mega", "gigafile", "dropbox", "mediafire"}

// For GDrive
GDRIVE_URL_REGEX = regexp.MustCompile(
Expand Down Expand Up @@ -183,10 +189,6 @@ var (
"all",
}

// For Pixiv Fanbox
PASSWORD_TEXTS = []string{"パス", "Pass", "pass", "密码"}
EXTERNAL_DOWNLOAD_PLATFORMS = []string{"mega", "gigafile", "dropbox", "mediafire"}

// For Kemono
KEMONO_IMG_SRC_TAG_REGEX = regexp.MustCompile(`(?i)<img[^>]+src=(?:\\)?"(?P<imgSrc>[^">]+)(?:\\)?"[^>]*>`)
KEMONO_IMG_SRC_TAG_REGEX_IDX = KEMONO_IMG_SRC_TAG_REGEX.SubexpIndex("imgSrc")
Expand Down
8 changes: 5 additions & 3 deletions gdrive/gdrive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package gdrive
import (
"context"
"encoding/json"
"errors"
"io/fs"
"os"
"testing"

Expand Down Expand Up @@ -106,7 +108,7 @@ func TestGDriveFolderDownload(t *testing.T) {

func TestGDriveServiceAcc(t *testing.T) {
gdriveJsonPath := "../test-gdrive-service-acc.json"
if _, err := os.Stat(gdriveJsonPath); os.IsNotExist(err) {
if _, err := os.Stat(gdriveJsonPath); errors.Is(err, fs.ErrNotExist) {
t.Fatalf("gdrive-service-acc.json not found at %s", gdriveJsonPath)
}

Expand Down Expand Up @@ -153,7 +155,7 @@ func TestGDriveServiceAcc(t *testing.T) {

func getGDriveUserClientSecret(t *testing.T) (*oauth2.Config, []byte) {
gdriveJsonPath := "../test-gcp-client-secret.json"
if _, err := os.Stat(gdriveJsonPath); os.IsNotExist(err) {
if _, err := os.Stat(gdriveJsonPath); errors.Is(err, fs.ErrNotExist) {
t.Fatalf("test-gcp-client-secret.json not found at %s", gdriveJsonPath)
}

Expand Down Expand Up @@ -202,7 +204,7 @@ func TestGDriveOauthDownload(t *testing.T) {
_, credJson := getGDriveUserClientSecret(t)

tokenJsonPath := "../test-gcp-token.json"
if _, err := os.Stat(tokenJsonPath); os.IsNotExist(err) {
if _, err := os.Stat(tokenJsonPath); errors.Is(err, fs.ErrNotExist) {
t.Fatalf("test-gcp-token.json not found at %s", tokenJsonPath)
}

Expand Down
3 changes: 2 additions & 1 deletion httpfuncs/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"net/http"
"net/url"
"os"
Expand Down Expand Up @@ -282,7 +283,7 @@ func downloadUrl(filePath string, queue chan struct{}, reqArgs *RequestArgs, ove

downloadedBytes, err := iofuncs.GetFileSize(filePath)
if err != nil {
if os.IsNotExist(err) {
if errors.Is(err, fs.ErrNotExist) {
// if the error wasn't because the file does not exist,
// then log the error and continue with the download process
logger.LogError(err, false, logger.ERROR)
Expand Down
4 changes: 3 additions & 1 deletion iofuncs/iofuncs.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package iofuncs

import (
"bufio"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
Expand All @@ -11,7 +13,7 @@ import (
// checks if a file or directory exists
func PathExists(filepath string) bool {
_, err := os.Stat(filepath)
return !os.IsNotExist(err)
return !errors.Is(err, fs.ErrNotExist)
}

// similar to PathExists but checks if the path exists and is a directory
Expand Down

0 comments on commit 4e81cde

Please sign in to comment.