Skip to content
This repository has been archived by the owner on May 30, 2021. It is now read-only.

Commit

Permalink
Merge pull request #76 from sotetsuk/prepare-for-ver001-beta
Browse files Browse the repository at this point in the history
preparing for new release #45
  • Loading branch information
sotetsuk committed May 5, 2016
2 parents 1839d08 + 27c7a51 commit 63ab769
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 36 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[![Build Status](https://travis-ci.org/sotetsuk/go-scholar.svg?branch=master)](https://travis-ci.org/sotetsuk/go-scholar)
[![Coverage Status](https://coveralls.io/repos/github/sotetsuk/go-scholar/badge.svg?branch=master)](https://coveralls.io/github/sotetsuk/go-scholar?branch=master)
[![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)]()
[![GitHub version](https://badge.fury.io/gh/sotetsuk%2Fgo-scholar.svg)](https://badge.fury.io/gh/sotetsuk%2Fgo-scholar)

# go-scholar
**Go**ogle **Scholar** crawler and scraper written in **Go**
Expand Down
27 changes: 1 addition & 26 deletions article.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,25 @@ const (
type Article struct {
Title string
Year string
// Authors []string
URL string
ClusterId string
NumberOfCitations string
NumberOfVersions string
InfoId string
PDFLink string
PDFSource string
// Bibtex string
}

func NewArticle() *Article {
a := Article{}
return &a
}

func (a *Article) Parse(s *goquery.Selection, useBibTeX bool) {
func (a *Article) Parse(s *goquery.Selection) {
a.parseTitle(s)
a.parseHeader(s)
a.parseFooter(s)
a.parseSideBar(s)
/*
if useBibTeX {
a.crawlAndParseBibTeX()
}
*/
}

func (a *Article) parseTitle(s *goquery.Selection) {
Expand Down Expand Up @@ -93,24 +86,6 @@ func (a *Article) parseSideBar(s *goquery.Selection) {
a.PDFSource = parsePDFSource(sideBarA.Text())
}

/*
func (a *Article) crawlAndParseBibTeX() {
popURL, err := CitePopUpQuery(a.InfoId)
if err != nil {
}
popDoc, err := goquery.NewDocument(popURL)
if err != nil {
}
bibURL, _ := popDoc.Find("#gs_citi > a:first-child").Attr("href")
bibDoc, err := goquery.NewDocument(SCHOLAR_URL + bibURL)
if err != nil {
}
a.Bibtex = bibDoc.Text()
}
*/

func (a *Article) String() string {
title := fmt.Sprintf("title: %v\n", a.Title)
year := fmt.Sprintf("year: %v\n", a.Year)
Expand Down
4 changes: 2 additions & 2 deletions article_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func checkWithFirst(query func(map[string]interface{}) (string, error), args []s
}

a := NewArticle()
a.Parse(doc.Find(WHOLE_ARTICLE_SELECTOR).First(), false)
a.Parse(doc.Find(WHOLE_ARTICLE_SELECTOR).First())

// check
if !a.same(aExpected) {
Expand Down Expand Up @@ -135,7 +135,7 @@ func TestIsValid(t *testing.T) {

// parse
a := NewArticle()
a.Parse(doc.Find(WHOLE_ARTICLE_SELECTOR).First(), false)
a.Parse(doc.Find(WHOLE_ARTICLE_SELECTOR).First())

// check
expected := false
Expand Down
4 changes: 2 additions & 2 deletions articles.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ const (
WHOLE_ARTICLE_SELECTOR = ".gs_r"
)

func ParseArticles(ch chan *Article, doc *goquery.Document, useBibTeX bool) {
func ParseArticles(ch chan *Article, doc *goquery.Document) {
defer close(ch)

parse := func(i int, s *goquery.Selection) {
a := NewArticle()
a.Parse(s, useBibTeX)
a.Parse(s)

// Add this Article to Articles
if a.isValid() {
Expand Down
4 changes: 2 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func main() {
}

// parse and output
ch := make(chan *Article)
go ParseArticles(ch, doc, false)
ch := make(chan *Article, ARTICLES_BUFFER)
go ParseArticles(ch, doc)
StdoutArticleAsJson(ch) // TODO: treat --json|--bibtex parameters
}
6 changes: 3 additions & 3 deletions query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import (
"github.com/docopt/docopt-go"
)

// $ go-scholar search --query "deep learning" --author "y bengio" --after 2015 --num 100 --start 20
func TestSearchQuery(t *testing.T) {
// $ go-scholar search --query "deep learning" --author "y bengio" --after 2015 --num 100 --start 20
args := []string{"go-scholar", "search", "--query", "deep learning", "--author", "y bengio", "--after", "2015", "--num", "100", "--start", "20"}
expected := "https://scholar.google.co.jp/scholar?hl=en&q=deep+learning+author:\"y+bengio\"&as_ylo=2015&as_yhi=&num=100&start=20"

Expand All @@ -16,8 +16,8 @@ func TestSearchQuery(t *testing.T) {
}
}

// $ go-scholar find 8108748482885444188
func TestFindQuery(t *testing.T) {
// $ go-scholar find 8108748482885444188
args := []string{"go-scholar", "find","8108748482885444188"}
expected := "https://scholar.google.co.jp/scholar?hl=en&cluster=8108748482885444188&num=1"

Expand All @@ -26,8 +26,8 @@ func TestFindQuery(t *testing.T) {
}
}

// $ go-scholar cite 8108748482885444188 --after 2012 --num 40 --start 20
func TestCiteQuery(t *testing.T) {
// $ go-scholar cite 8108748482885444188 --after 2012 --num 40 --start 20
args := []string{"go-scholar", "cite", "8108748482885444188", "--after", "2012", "--num", "40", "--start", "20"}
expected := "https://scholar.google.co.jp/scholar?hl=en&cites=8108748482885444188&as_ylo=2012&as_yhi=&num=40&start=20"

Expand Down
2 changes: 1 addition & 1 deletion utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ func parseInfoId(url string) string {
return strings.TrimSpace(url)
}

// e.g., "[PDF] from arxiv.orgarxiv.org [PDF]"", => "PDFSource": "arxiv.org"
func parsePDFSource(s string) string { // TODO: fix
// e.g., "[PDF] from arxiv.orgarxiv.org [PDF]"", => "PDFSource": "arxiv.org"
prefix := "[PDF] from "
suffix := " [PDF]"
if strings.HasPrefix(s, prefix) && strings.HasSuffix(s, suffix) {
Expand Down

0 comments on commit 63ab769

Please sign in to comment.