Skip to content
This repository has been archived by the owner on May 30, 2021. It is now read-only.

Commit

Permalink
Merge pull request #102 from sotetsuk/develop-v0.2.0
Browse files Browse the repository at this point in the history
release v0.2.0
  • Loading branch information
sotetsuk committed Jun 18, 2016
2 parents 61c85e3 + 1e8973e commit e1573ef
Show file tree
Hide file tree
Showing 26 changed files with 401 additions and 113 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.idea/*
*.iml
goscholar
goscholar*
Godeps/_workspace/src/*
4 changes: 1 addition & 3 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ before_install:
- if ! go get code.google.com/p/go.tools/cmd/cover; then go get golang.org/x/tools/cmd/cover; fi

install:
- go get github.com/docopt/docopt-go
- go get github.com/PuerkitoBio/goquery
- go get github.com/Sirupsen/logrus
- ./build

script:
- $HOME/gopath/bin/goveralls -repotoken $COVERALLS_TOKEN
Expand Down
51 changes: 51 additions & 0 deletions Godeps/Godeps.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Godeps/Readme

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Godeps/_workspace/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

159 changes: 103 additions & 56 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ $ goscholar/build
Options:

```
--fmt: apply go fmt to all files
--dev: apply go fmt to all files and save dependencies
```

After ```build``` command executed, you will find corss-compiled binary files in ```bin``` directory.

## Feature

- API for Go
Expand All @@ -43,7 +45,7 @@ Options:
- find by ```<cluster-id>```
- search the articles citing ```<cluster-id>```
- JSON output
- recursive clawling is not implemented
- recursive crawling is not implemented

## Go API

Expand Down Expand Up @@ -75,68 +77,110 @@ for a := range ch {
### Example

```sh
$ goscholar search --keywords "deep learning nature" --author "y bengio" --after 2015 --num 1 | python -mjson.tool
$ goscholar search --keywords "deep learning nature" --author "y bengio" --after 2015 --num 1 | jq .
[
{
"cluster_id": "5362332738201102290",
"info_id_": "0qfs6zbVakoJ",
"link": {
"format": "PDF",
"name": "psu.edu",
"url": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.436.894&rep=rep1&type=pdf"
},
"num_cite_": "390",
"num_ver": "7",
"title": {
"name": "Deep learning",
"url": "http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html"
},
"year": "2015"
}
]
{
"title": {
"name": "Deep learning",
"url": "http://www.nature.com/nature/journal/v521/n7553/abs/nature14539.html"
},
"year": "2015",
"cluster_id": "5362332738201102290",
"num_cite": "499",
"num_ver": "7",
"info_id": "0qfs6zbVakoJ",
"link": {
"name": "psu.edu",
"url": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.436.894&rep=rep1&type=pdf",
"format": "PDF"
},
"bibtex": "@article{lecun2015deep, title={Deep learning}, author={LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey}, journal={Nature}, volume={521}, number={7553}, pages={436--444}, year={2015}, publisher={Nature Publishing Group}}",
"author": [
"LeCun, Yann",
"Bengio, Yoshua",
"Hinton, Geoffrey"
],
"journal": "Nature",
"booktitle": "",
"volume": "521",
"number": "7553",
"pages": "436--444",
"publisher": "Nature Publishing Group"
}
]
```

```sh
$ goscholar find 15502119379559163003 | python -mjson.tool
$ goscholar find 15502119379559163003 | jq .
[
{
"cluster_id": "15502119379559163003",
"info_id": "e6RSJHGXItcJ",
"link": {
"format": "PDF",
"name": "wustl.edu",
"url": "http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf"
},
"num_cite": "260",
"num_ver": "",
"title": {
"name": "Deep learning via Hessian-free optimization",
"url": "http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf"
},
"year": "2010"
}
]
{
"title": {
"name": "Deep learning via Hessian-free optimization",
"url": "http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf"
},
"year": "2010",
"cluster_id": "15502119379559163003",
"num_cite": "269",
"num_ver": "",
"info_id": "e6RSJHGXItcJ",
"link": {
"name": "wustl.edu",
"url": "http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf",
"format": "PDF"
},
"bibtex": "@inproceedings{martens2010deep, title={Deep learning via Hessian-free optimization}, author={Martens, James}, booktitle={Proceedings of the 27th International Conference on Machine Learning (ICML-10)}, pages={735--742}, year={2010}}",
"author": [
"Martens, James"
],
"journal": "",
"booktitle": "Proceedings of the 27th International Conference on Machine Learning (ICML-10)",
"volume": "",
"number": "",
"pages": "735--742",
"publisher": ""
}
]
```

```sh
$ goscholar cite 15502119379559163003 --num 1 | python -mjson.tool
[
{
"cluster_id": "3674494786452480182",
"info_id": "tmCGO4pt_jIJ",
"link": {
"format": "PDF",
"name": "toronto.edu",
"url": "http://www.cs.toronto.edu/~asamir/papers/SPM_DNN_12.pdf"
},
"num_cite": "1452",
"num_ver": "27",
"title": {
"name": "Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups",
"url": "http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6296526"
},
"year": "2012"
}
{
"title": {
"name": "Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups",
"url": "http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6296526"
},
"year": "2012",
"cluster_id": "3674494786452480182",
"num_cite": "1559",
"num_ver": "27",
"info_id": "tmCGO4pt_jIJ",
"link": {
"name": "toronto.edu",
"url": "http://www.cs.toronto.edu/~asamir/papers/SPM_DNN_12.pdf",
"format": "PDF"
},
"bibtex": "@article{hinton2012deep, title={Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups}, author={Hinton, Geoffrey and Deng, Li and Yu, Dong and Dahl, George E and Mohamed, Abdel-rahman and Jaitly, Navdeep and Senior, Andrew and Vanhoucke, Vincent and Nguyen, Patrick and Sainath, Tara N and others}, journal={Signal Processing Magazine, IEEE}, volume={29}, number={6}, pages={82--97}, year={2012}, publisher={IEEE}}",
"author": [
"Hinton, Geoffrey",
"Deng, Li",
"Yu, Dong",
"Dahl, George E",
"Mohamed, Abdel-rahman",
"Jaitly, Navdeep",
"Senior, Andrew",
"Vanhoucke, Vincent",
"Nguyen, Patrick",
"Sainath, Tara N",
"others"
],
"journal": "Signal Processing Magazine, IEEE",
"booktitle": "",
"volume": "29",
"number": "6",
"pages": "82--97",
"publisher": "IEEE"
}
]
```

Expand All @@ -149,9 +193,11 @@ goscholar: Google Scholar crawler and scraper written in Go
Usage:
goscholar search [--keywords=<keywords>] [--author=<author>] [--title=<title>]
[--after=<year>] [--before=<year>] [--num=<num>] [--start=<start>]
goscholar find <cluster-id>
[--after=<year>] [--before=<year>] [--num=<num>] [--start=<start>]
[--user-agent=<user-agent>]
goscholar find <cluster-id> [--user-agent=<user-agent>]
goscholar cite <cluster-id> [--after=<year>] [--before=<year>] [--num=<num>] [--start=<start>]
[--user-agent=<user-agent>]
goscholar -h | --help
goscholar --version
Expand All @@ -177,6 +223,7 @@ Others:
- [github.com/docopt/docopt-go](https://github.com/docopt/docopt-go)
- [github.com/PuerkitoBio/goquery](https://github.com/PuerkitoBio/goquery)
- [github.com/Sirupsen/logrus](https://github.com/PuerkitoBio/goquery)
- [github.com/sotetsuk/gobibtex](https://github.com/sotetsuk/gobibtex)

## Related Work
goscholar is inspired by [scholar.py](https://github.com/ckreibich/scholar.py)
Expand Down
39 changes: 17 additions & 22 deletions article.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,29 @@ package goscholar

import (
"encoding/json"
"fmt"
log "github.com/Sirupsen/logrus"
"github.com/k0kubun/pp"
"strconv"
"strings"
)

// Article stores the parsed results from Google Scholar.
type Article struct {
Title *Title `json:"title"`
Year string `json:"year"`
ClusterId string `json:"cluster_id"`
NumCite string `json:"num_cite"`
NumVer string `json:"num_ver"`
InfoId string `json:"info_id"`
Link *Link `json:"link"`
Title *Title `json:"title"`
Year string `json:"year"`
ClusterId string `json:"cluster_id"`
NumCite string `json:"num_cite"`
NumVer string `json:"num_ver"`
InfoId string `json:"info_id"`
Link *Link `json:"link"`
BibTeX string `json:"bibtex"`
Author []string `json:"author"`
Journal string `json:"journal"`
Booktitle string `json:"booktitle"`
Volume string `json:"volume"`
Number string `json:"number"`
Pages string `json:"pages"`
Publisher string `json:"publisher"`
}

// Title is an attribute of Article.
Expand Down Expand Up @@ -46,20 +54,7 @@ func newArticle() *Article {

// String provides a pretty print.
func (a *Article) String() string {
ret := "[Title]\n"
ret += fmt.Sprintf(" Name: %v\n", a.Title.Name)
ret += fmt.Sprintf(" Url: %v\n", a.Title.Url)
ret += fmt.Sprintf("[Year]\n %v\n", a.Year)
ret += fmt.Sprintf("[ClusterId]\n %v\n", a.ClusterId)
ret += fmt.Sprintf("[NumCite]\n %v\n", a.NumCite)
ret += fmt.Sprintf("[NumVer]\n %v\n", a.NumVer)
ret += fmt.Sprintf("[InfoId]\n %v\n", a.InfoId)
ret += "[Link]\n"
ret += fmt.Sprintf(" Name: %v\n", a.Link.Name)
ret += fmt.Sprintf(" Url: %v\n", a.Link.Url)
ret += fmt.Sprintf(" Format: %v", a.Link.Format)

return ret
return pp.Sprint(a)
}

// Json provides JSON formatted Article.
Expand Down
28 changes: 9 additions & 19 deletions article_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ func init() {
Url: "http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf",
Format: "PDF",
},
BibTeX: "@inproceedings{martens2010deep, title={Deep learning via Hessian-free optimization}, author={Martens, James}, booktitle={Proceedings of the 27th International Conference on Machine Learning (ICML-10)}, pages={735--742}, year={2010}}",
Author: []string{"Martens, James"},
Journal: "",
Booktitle: "Proceedings of the 27th International Conference on Machine Learning (ICML-10)",
Volume: "",
Number: "",
Pages: "735--742",
Publisher: "",
}
}

Expand Down Expand Up @@ -68,30 +76,12 @@ func TestNewArticle(t *testing.T) {

func ExampleString() {
fmt.Println(article)
// Output:
// [Title]
// Name: Deep learning via Hessian-free optimization
// Url: http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf
// [Year]
// 2010
// [ClusterId]
// 15502119379559163003
// [NumCite]
// 260
// [NumVer]
// 9
// [InfoId]
// e6RSJHGXItcJ
// [Link]
// Name: wustl.edu
// Url: http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf
// Format: PDF
}

func ExampleJson() {
fmt.Println(article.Json())
// Output:
// {"title":{"name":"Deep learning via Hessian-free optimization","url":"http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf"},"year":"2010","cluster_id":"15502119379559163003","num_cite":"260","num_ver":"9","info_id":"e6RSJHGXItcJ","link":{"name":"wustl.edu","url":"http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf","format":"PDF"}}
// {"title":{"name":"Deep learning via Hessian-free optimization","url":"http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf"},"year":"2010","cluster_id":"15502119379559163003","num_cite":"260","num_ver":"9","info_id":"e6RSJHGXItcJ","link":{"name":"wustl.edu","url":"http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_Martens10.pdf","format":"PDF"},"bibtex":"@inproceedings{martens2010deep, title={Deep learning via Hessian-free optimization}, author={Martens, James}, booktitle={Proceedings of the 27th International Conference on Machine Learning (ICML-10)}, pages={735--742}, year={2010}}","author":["Martens, James"],"journal":"","booktitle":"Proceedings of the 27th International Conference on Machine Learning (ICML-10)","volume":"","number":"","pages":"735--742","publisher":""}
}

func TestIsValid(t *testing.T) {
Expand Down
Loading

0 comments on commit e1573ef

Please sign in to comment.