diff --git a/extract.go b/extract.go index cf2d4bf..4150c89 100644 --- a/extract.go +++ b/extract.go @@ -30,6 +30,14 @@ func getDownloadLinks(inputURL string, channelID string, interactive bool) map[s return skipDuplicateLinks(links, channelID, interactive) } } + if RegexpUrlTistoryLegacy.MatchString(inputURL) { + links, err := getLegacyTistoryUrls(inputURL) + if err != nil { + fmt.Println("legacy tistory URL failed,", inputURL, ",", err) + } else if len(links) > 0 { + return skipDuplicateLinks(links, channelID, interactive) + } + } if RegexpUrlGfycat.MatchString(inputURL) { links, err := getGfycatUrls(inputURL) if err != nil { diff --git a/main.go b/main.go index 67766d7..39eda6b 100644 --- a/main.go +++ b/main.go @@ -669,23 +669,6 @@ func getTwitterStatusUrls(url string, channelID string) (map[string]string, erro return links, nil } -func getTistoryUrls(url string) (map[string]string, error) { - url = strings.Replace(url, "/image/", "/original/", -1) - return map[string]string{url: ""}, nil -} - -func getTistoryWithCDNUrls(urlI string) (map[string]string, error) { - parameters, _ := url.ParseQuery(urlI) - if val, ok := parameters["fname"]; ok { - if len(val) > 0 { - if RegexpUrlTistory.MatchString(val[0]) { - return getTistoryUrls(val[0]) - } - } - } - return nil, nil -} - func getGfycatUrls(url string) (map[string]string, error) { parts := strings.Split(url, "/") if len(parts) < 3 { @@ -1051,8 +1034,8 @@ func getPossibleTistorySiteUrls(url string) (map[string]string, error) { doc.Find(".article img, #content img, div[role=main] img, .section_blogview img").Each(func(i int, s *goquery.Selection) { foundUrl, exists := s.Attr("src") if exists { - isTistoryCdnUrl := RegexpUrlTistoryWithCDN.MatchString(foundUrl) - isTistoryUrl := RegexpUrlTistory.MatchString(foundUrl) + isTistoryCdnUrl := RegexpUrlTistoryLegacyWithCDN.MatchString(foundUrl) + isTistoryUrl := RegexpUrlTistoryLegacy.MatchString(foundUrl) if isTistoryCdnUrl == true { finalTistoryUrls, _ := getTistoryWithCDNUrls(foundUrl) if len(finalTistoryUrls) > 0 { @@ -1062,7 +1045,7 @@ func getPossibleTistorySiteUrls(url string) (map[string]string, error) { } } } else if isTistoryUrl == true { - finalTistoryUrls, _ := getTistoryUrls(foundUrl) + finalTistoryUrls, _ := getLegacyTistoryUrls(foundUrl) if len(finalTistoryUrls) > 0 { for finalTistoryUrl := range finalTistoryUrls { foundFilename := s.AttrOr("filename", "") diff --git a/main_test.go b/main_test.go index 4fd65f1..fc984dd 100644 --- a/main_test.go +++ b/main_test.go @@ -12,8 +12,8 @@ import ( func init() { RegexpUrlTwitter, _ = regexp.Compile(REGEXP_URL_TWITTER) - RegexpUrlTistory, _ = regexp.Compile(REGEXP_URL_TISTORY) - RegexpUrlTistoryWithCDN, _ = regexp.Compile(REGEXP_URL_TISTORY_WITH_CDN) + RegexpUrlTistoryLegacy, _ = regexp.Compile(REGEXP_URL_TISTORY_LEGACY) + RegexpUrlTistoryLegacyWithCDN, _ = regexp.Compile(REGEXP_URL_TISTORY_LEGACY_WITH_CDN) RegexpUrlGfycat, _ = regexp.Compile(REGEXP_URL_GFYCAT) RegexpUrlInstagram, _ = regexp.Compile(REGEXP_URL_INSTAGRAM) RegexpUrlImgurSingle, _ = regexp.Compile(REGEXP_URL_IMGUR_SINGLE) @@ -77,7 +77,7 @@ var getTistoryUrlsTests = []urlsTestpair{ func TestGetTistoryUrls(t *testing.T) { for _, pair := range getTistoryUrlsTests { - v, err := getTistoryUrls(pair.value) + v, err := getLegacyTistoryUrls(pair.value) if err != nil { t.Errorf("For %v, expected %v, got %v", pair.value, nil, err) } diff --git a/regex.go b/regex.go index a760ae5..80ba928 100644 --- a/regex.go +++ b/regex.go @@ -4,22 +4,44 @@ import ( "regexp" ) +const ( + REGEXP_URL_TWITTER = `^http(s?):\/\/pbs(-[0-9]+)?\.twimg\.com\/media\/[^\./]+\.(jpg|png)((\:[a-z]+)?)$` + REGEXP_URL_TWITTER_STATUS = `^http(s?):\/\/(www\.)?twitter\.com\/([A-Za-z0-9-_\.]+\/status\/|statuses\/|i\/web\/status\/)([0-9]+)$` + REGEXP_URL_TISTORY = `^http(s?):\/\/t[0-9]+\.daumcdn\.net\/cfile\/tistory\/([A-Z0-9]+?)(\?original)?$` + REGEXP_URL_TISTORY_LEGACY = `^http(s?):\/\/[a-z0-9]+\.uf\.tistory\.com\/(image|original)\/[A-Z0-9]+$` + REGEXP_URL_TISTORY_LEGACY_WITH_CDN = `^http(s)?:\/\/[0-9a-z]+.daumcdn.net\/[a-z]+\/[a-zA-Z0-9\.]+\/\?scode=mtistory&fname=http(s?)%3A%2F%2F[a-z0-9]+\.uf\.tistory\.com%2F(image|original)%2F[A-Z0-9]+$` + REGEXP_URL_GFYCAT = `^http(s?):\/\/gfycat\.com\/(gifs\/detail\/)?[A-Za-z]+$` + REGEXP_URL_INSTAGRAM = `^http(s?):\/\/(www\.)?instagram\.com\/p\/[^/]+\/(\?[^/]+)?$` + REGEXP_URL_IMGUR_SINGLE = `^http(s?):\/\/(i\.)?imgur\.com\/[A-Za-z0-9]+(\.gifv)?$` + REGEXP_URL_IMGUR_ALBUM = `^http(s?):\/\/imgur\.com\/(a\/|gallery\/|r\/[^\/]+\/)[A-Za-z0-9]+(#[A-Za-z0-9]+)?$` + REGEXP_URL_GOOGLEDRIVE = `^http(s?):\/\/drive\.google\.com\/file\/d\/[^/]+\/view$` + REGEXP_URL_GOOGLEDRIVE_FOLDER = `^http(s?):\/\/drive\.google\.com\/(drive\/folders\/|open\?id=)([^/]+)$` + REGEXP_URL_POSSIBLE_TISTORY_SITE = `^http(s)?:\/\/[0-9a-zA-Z\.-]+\/(m\/)?(photo\/)?[0-9]+$` + REGEXP_URL_FLICKR_PHOTO = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/([0-9]+)@([A-Z0-9]+)\/([0-9]+)(\/)?(\/in\/album-([0-9]+)(\/)?)?$` + REGEXP_URL_FLICKR_ALBUM = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/(([0-9]+)@([A-Z0-9]+)|[A-Za-z0-9]+)\/(albums\/(with\/)?|(sets\/)?)([0-9]+)(\/)?$` + REGEXP_URL_FLICKR_ALBUM_SHORT = `^http(s)?:\/\/((www\.)?flickr\.com\/gp\/[0-9]+@[A-Z0-9]+\/[A-Za-z0-9]+|flic\.kr\/s\/[a-zA-Z0-9]+)$` + REGEXP_URL_STREAMABLE = `^http(s?):\/\/(www\.)?streamable\.com\/([0-9a-z]+)$` + + REGEXP_FILENAME = `^^[^/\\:*?"<>|]{1,150}\.[A-Za-z0-9]{2,4}$$` +) + var ( - RegexpUrlTwitter *regexp.Regexp - RegexpUrlTwitterStatus *regexp.Regexp - RegexpUrlTistory *regexp.Regexp - RegexpUrlTistoryWithCDN *regexp.Regexp - RegexpUrlGfycat *regexp.Regexp - RegexpUrlInstagram *regexp.Regexp - RegexpUrlImgurSingle *regexp.Regexp - RegexpUrlImgurAlbum *regexp.Regexp - RegexpUrlGoogleDrive *regexp.Regexp - RegexpUrlGoogleDriveFolder *regexp.Regexp - RegexpUrlPossibleTistorySite *regexp.Regexp - RegexpUrlFlickrPhoto *regexp.Regexp - RegexpUrlFlickrAlbum *regexp.Regexp - RegexpUrlFlickrAlbumShort *regexp.Regexp - RegexpUrlStreamable *regexp.Regexp + RegexpUrlTwitter *regexp.Regexp + RegexpUrlTwitterStatus *regexp.Regexp + RegexpUrlTistory *regexp.Regexp + RegexpUrlTistoryLegacy *regexp.Regexp + RegexpUrlTistoryLegacyWithCDN *regexp.Regexp + RegexpUrlGfycat *regexp.Regexp + RegexpUrlInstagram *regexp.Regexp + RegexpUrlImgurSingle *regexp.Regexp + RegexpUrlImgurAlbum *regexp.Regexp + RegexpUrlGoogleDrive *regexp.Regexp + RegexpUrlGoogleDriveFolder *regexp.Regexp + RegexpUrlPossibleTistorySite *regexp.Regexp + RegexpUrlFlickrPhoto *regexp.Regexp + RegexpUrlFlickrAlbum *regexp.Regexp + RegexpUrlFlickrAlbumShort *regexp.Regexp + RegexpUrlStreamable *regexp.Regexp ) func initRegex() error { @@ -36,7 +58,11 @@ func initRegex() error { if err != nil { return err } - RegexpUrlTistoryWithCDN, err = regexp.Compile(REGEXP_URL_TISTORY_WITH_CDN) + RegexpUrlTistoryLegacy, err = regexp.Compile(REGEXP_URL_TISTORY_LEGACY) + if err != nil { + return err + } + RegexpUrlTistoryLegacyWithCDN, err = regexp.Compile(REGEXP_URL_TISTORY_LEGACY_WITH_CDN) if err != nil { return err } diff --git a/tistory.go b/tistory.go new file mode 100644 index 0000000..e289c26 --- /dev/null +++ b/tistory.go @@ -0,0 +1,33 @@ +package main + +import ( + "net/url" + "strings" +) + +// getTistoryUrls downloads tistory URLs +// http://t1.daumcdn.net/cfile/tistory/[…] => http://t1.daumcdn.net/cfile/tistory/[…] +// http://t1.daumcdn.net/cfile/tistory/[…]?original => as is +func getTistoryUrls(link string) (map[string]string, error) { + if !strings.HasSuffix(link, "?original") { + link += "?original" + } + return map[string]string{link: ""}, nil +} + +func getLegacyTistoryUrls(link string) (map[string]string, error) { + link = strings.Replace(link, "/image/", "/original/", -1) + return map[string]string{link: ""}, nil +} + +func getTistoryWithCDNUrls(urlI string) (map[string]string, error) { + parameters, _ := url.ParseQuery(urlI) + if val, ok := parameters["fname"]; ok { + if len(val) > 0 { + if RegexpUrlTistoryLegacy.MatchString(val[0]) { + return getLegacyTistoryUrls(val[0]) + } + } + } + return nil, nil +} diff --git a/vars.go b/vars.go index 21111bf..0066f79 100644 --- a/vars.go +++ b/vars.go @@ -1,28 +1,11 @@ package main const ( - VERSION = "1.32" - DATABASE_DIR = "database" - RELEASE_URL = "https://github.com/Seklfreak/discord-image-downloader-go/releases/latest" - RELEASE_API_URL = "https://api.github.com/repos/Seklfreak/discord-image-downloader-go/releases/latest" - IMGUR_CLIENT_ID = "a39473314df3f59" - REGEXP_URL_TWITTER = `^http(s?):\/\/pbs(-[0-9]+)?\.twimg\.com\/media\/[^\./]+\.(jpg|png)((\:[a-z]+)?)$` - REGEXP_URL_TWITTER_STATUS = `^http(s?):\/\/(www\.)?twitter\.com\/([A-Za-z0-9-_\.]+\/status\/|statuses\/|i\/web\/status\/)([0-9]+)$` - REGEXP_URL_TISTORY = `^http(s?):\/\/[a-z0-9]+\.uf\.tistory\.com\/(image|original)\/[A-Z0-9]+$` - REGEXP_URL_TISTORY_WITH_CDN = `^http(s)?:\/\/[0-9a-z]+.daumcdn.net\/[a-z]+\/[a-zA-Z0-9\.]+\/\?scode=mtistory&fname=http(s?)%3A%2F%2F[a-z0-9]+\.uf\.tistory\.com%2F(image|original)%2F[A-Z0-9]+$` - REGEXP_URL_GFYCAT = `^http(s?):\/\/gfycat\.com\/(gifs\/detail\/)?[A-Za-z]+$` - REGEXP_URL_INSTAGRAM = `^http(s?):\/\/(www\.)?instagram\.com\/p\/[^/]+\/(\?[^/]+)?$` - REGEXP_URL_IMGUR_SINGLE = `^http(s?):\/\/(i\.)?imgur\.com\/[A-Za-z0-9]+(\.gifv)?$` - REGEXP_URL_IMGUR_ALBUM = `^http(s?):\/\/imgur\.com\/(a\/|gallery\/|r\/[^\/]+\/)[A-Za-z0-9]+(#[A-Za-z0-9]+)?$` - REGEXP_URL_GOOGLEDRIVE = `^http(s?):\/\/drive\.google\.com\/file\/d\/[^/]+\/view$` - REGEXP_URL_GOOGLEDRIVE_FOLDER = `^http(s?):\/\/drive\.google\.com\/(drive\/folders\/|open\?id=)([^/]+)$` - REGEXP_URL_POSSIBLE_TISTORY_SITE = `^http(s)?:\/\/[0-9a-zA-Z\.-]+\/(m\/)?(photo\/)?[0-9]+$` - REGEXP_URL_FLICKR_PHOTO = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/([0-9]+)@([A-Z0-9]+)\/([0-9]+)(\/)?(\/in\/album-([0-9]+)(\/)?)?$` - REGEXP_URL_FLICKR_ALBUM = `^http(s)?:\/\/(www\.)?flickr\.com\/photos\/(([0-9]+)@([A-Z0-9]+)|[A-Za-z0-9]+)\/(albums\/(with\/)?|(sets\/)?)([0-9]+)(\/)?$` - REGEXP_URL_FLICKR_ALBUM_SHORT = `^http(s)?:\/\/((www\.)?flickr\.com\/gp\/[0-9]+@[A-Z0-9]+\/[A-Za-z0-9]+|flic\.kr\/s\/[a-zA-Z0-9]+)$` - REGEXP_URL_STREAMABLE = `^http(s?):\/\/(www\.)?streamable\.com\/([0-9a-z]+)$` - - REGEXP_FILENAME = `^^[^/\\:*?"<>|]{1,150}\.[A-Za-z0-9]{2,4}$$` + VERSION = "1.33" + DATABASE_DIR = "database" + RELEASE_URL = "https://github.com/Seklfreak/discord-image-downloader-go/releases/latest" + RELEASE_API_URL = "https://api.github.com/repos/Seklfreak/discord-image-downloader-go/releases/latest" + IMGUR_CLIENT_ID = "a39473314df3f59" DEFAULT_CONFIG_FILE = "config.ini"