Skip to content

Commit

Permalink
Download whole manga (#15)
Browse files Browse the repository at this point in the history
* Download whole manga

Implement full comic download for all sites

* Document exported functions, use better return values, rename util funciton

* Add --all flag to download whole manga

* Do not use set for uniq urls

* Update readme with -all usage
  • Loading branch information
jphager2 authored and Girbons committed May 2, 2019
1 parent 852a881 commit 03c3e70
Show file tree
Hide file tree
Showing 20 changed files with 307 additions and 152 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@

# path where comics are downloaded
comics/

# binary
comics-downloader
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Put the script under a folder.
You can invoke the `--help`:

```
-all
Download all issues of the Comic or Comics
-country string
Set the country to retrieve a manga, Used by MangaRock
-format string
Expand Down Expand Up @@ -72,6 +74,16 @@ example:
./comics-downloader -url=[your url] -format=epub
```

### Download the whole comic

Provide the comic url and use the `-all` flag. The url provided can be any issue of the comic, or the main comic page url.

example:

```bash
./comics-downloader -url=[your url] -all
```

## Config file

To avoid to specify everytime the output format you can create a `config.yaml` file in the same path of the executable.
Expand Down
13 changes: 8 additions & 5 deletions cmd/app/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ func init() {
}

// Run will run the downloader app
func Run(link, format, country string) {
func Run(link string, format string, country string, all bool) {
conf := new(config.ComicConfig)
if err := conf.LoadConfig(); err != nil {
log.Warning(err)
Expand All @@ -26,6 +26,7 @@ func Run(link, format, country string) {
log.Error("url parameter is required")
}

// TODO: This doesn't seem necessary
if !strings.HasSuffix(link, ",") {
link = link + ","
}
Expand All @@ -44,15 +45,17 @@ func Run(link, format, country string) {
}).Info("Downloading...")
// in case the link is supported
// setup the right strategy to parse a comic
comic, err := sites.LoadComicFromSource(conf, source, u, country, format)
collection, err := sites.LoadComicFromSource(conf, source, u, country, format, all)
if err != nil {
log.Error(err)
continue
}

err = comic.MakeComic()
if err != nil {
log.Error(err)
for _, comic := range collection {
err = comic.MakeComic()
if err != nil {
log.Error(err)
}
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ func main() {
url := flag.String("url", "", "Comic URL or Comic URLS by separating each site with a comma without the use of spaces")
format := flag.String("format", "pdf", "Comic format output, supported formats are pdf,epub,cbr,cbz")
country := flag.String("country", "", "Set the country to retrieve a manga, Used by MangaRock")
all := flag.Bool("all", false, "Download all issues of the Comic or Comics")
flag.Parse()

// is this the best way?
Expand All @@ -25,5 +26,5 @@ func main() {
}
}

app.Run(*url, *format, *country)
app.Run(*url, *format, *country, *all)
}
6 changes: 0 additions & 6 deletions pkg/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@ type Comic struct {
Config *config.ComicConfig
}

// SplitURL return the url splitted by "/"
func (comic *Comic) SplitURL() []string {
return strings.Split(comic.URLSource, "/")
}

// getConfigValues will try to get some
func (comic *Comic) readConfigValues() {
// retrieve values from config file
Expand Down Expand Up @@ -93,7 +88,6 @@ func (comic *Comic) retrieveImageFromResponse(response *http.Response) (io.Reade
}

return content, tp, err

}

// makeEPUB create the epub file
Expand Down
8 changes: 0 additions & 8 deletions pkg/core/core_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,6 @@ func TestNewComic(t *testing.T) {
assert.Equal(t, 1, len(comic.Links))
}

func TestSplitURL(t *testing.T) {
comic := new(Comic)
comic.URLSource = "https://www.mangareader.net/naruto/1/"

assert.Equal(t, comic.SplitURL()[3], "naruto")
assert.Equal(t, comic.SplitURL()[4], "1")
}

func TestMakeComicPDF(t *testing.T) {
comic := new(Comic)

Expand Down
37 changes: 35 additions & 2 deletions pkg/sites/comicextra/comicextra.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,47 @@ func retrieveImageLinks(comic *core.Comic) ([]string, error) {
}

return links, err
}

func isSingleIssue(url string) bool {
return util.TrimAndSplitURL(url)[3] != "comic"
}

// RetrieveIssueLinks gets a slice of urls for all issues in a comic
func RetrieveIssueLinks(url string, all bool) ([]string, error) {
if all && isSingleIssue(url) {
url = "https://www.comicextra.com/comic/" + util.TrimAndSplitURL(url)[3]
} else if isSingleIssue(url) {
return []string{url}, nil
}

name := util.TrimAndSplitURL(url)[4]
var links []string

response, err := soup.Get(url)
if err != nil {
return nil, err
}

re := regexp.MustCompile("<a[^>]+href=\"([^\">]+" + "/" + name + "/.+)\"")
match := re.FindAllStringSubmatch(response, -1)

for i := range match {
url := match[i][1] + "/full"
if util.IsURLValid(url) && !util.IsValueInSlice(url, links) {
links = append(links, url)
}
}

return links, err
}

// Initialize will initialize the comic based
// on comicextra.com
func Initialize(comic *core.Comic) error {
comic.Name = comic.SplitURL()[3]
comic.IssueNumber = comic.SplitURL()[4]
parts := util.TrimAndSplitURL(comic.URLSource)
comic.Name = parts[3]
comic.IssueNumber = parts[4]

links, err := retrieveImageLinks(comic)
comic.Links = links
Expand Down
7 changes: 7 additions & 0 deletions pkg/sites/comicextra/comicextra_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@ func TestComicExtraSetup(t *testing.T) {
assert.Equal(t, "chapter-58", comic.IssueNumber)
assert.Equal(t, 24, len(comic.Links))
}

func TestRetrieveIssueLinks(t *testing.T) {
issues, err := RetrieveIssueLinks("https://www.comicextra.com/comic/100-bullets", false)

assert.Nil(t, err)
assert.Equal(t, 100, len(issues))
}
65 changes: 46 additions & 19 deletions pkg/sites/loader.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package sites

import (
"errors"
"fmt"

"github.com/Girbons/comics-downloader/pkg/config"
Expand All @@ -11,36 +12,62 @@ import (
"github.com/Girbons/comics-downloader/pkg/sites/mangatown"
)

// LoadComicFromSource will return a `comic` instance initialized based on the source
func LoadComicFromSource(conf *config.ComicConfig, source, url, country, format string) (*core.Comic, error) {
func initializeCollection(initializer func(*core.Comic) error, url string, issues []string, conf *config.ComicConfig, format string, source string, options map[string]string) ([]*core.Comic, error) {
var collection []*core.Comic
var err error

comic := &core.Comic{
Config: conf,
URLSource: url,
Source: source,
Format: format,
if len(issues) == 0 {
return collection, errors.New("No issues found.")
}

for _, url := range issues {
comic := &core.Comic{
URLSource: url,
Config: conf,
Source: source,
Format: format,
Options: options,
}
if err = initializer(comic); err != nil {
return collection, err
}
collection = append(collection, comic)
}

return collection, nil
}

// LoadComicFromSource will return a `comic` instance initialized based on the source
func LoadComicFromSource(conf *config.ComicConfig, source, url, country, format string, all bool) ([]*core.Comic, error) {
var err error
var collection []*core.Comic
var initializer func(*core.Comic) error
var issues []string

options := map[string]string{"country": country}

switch source {
case "www.comicextra.com":
err = comicextra.Initialize(comic)
issues, err = comicextra.RetrieveIssueLinks(url, all)
initializer = comicextra.Initialize
case "mangarock.com":
if country != "" {
options := map[string]string{"country": country}
comic.Options = options
}
err = mangarock.Initialize(comic)
issues, err = mangarock.RetrieveIssueLinks(url, all, options)
initializer = mangarock.Initialize
case "www.mangareader.net":
err = mangareader.Initialize(comic)
issues, err = mangareader.RetrieveIssueLinks(url, all)
initializer = mangareader.Initialize
case "www.mangatown.com":
err = mangatown.Initialize(comic)
case "www.mangahere.cc":
err = fmt.Errorf("mangahere is currently disabled")
//sites.SetupMangaHere(comic)
issues, err = mangatown.RetrieveIssueLinks(url, all)
initializer = mangatown.Initialize
default:
err = fmt.Errorf("It was not possible to determine the source")
}

return comic, err
if err != nil {
return collection, err
}

collection, err = initializeCollection(initializer, url, issues, conf, format, source, options)

return collection, err
}
60 changes: 46 additions & 14 deletions pkg/sites/loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,82 @@ import (
"github.com/stretchr/testify/assert"
)

func TestSiteLoaderMangatown(t *testing.T) {
url := "https://www.mangatown.com/manga/naruto/v63/c693/"
conf := new(config.ComicConfig)
collection, err := LoadComicFromSource(conf, "www.mangatown.com", url, "", "", false)

assert.Nil(t, err)
assert.Equal(t, len(collection), 1)

comic := collection[0]

assert.Equal(t, "www.mangatown.com", comic.Source)
assert.Equal(t, url, comic.URLSource)
assert.Equal(t, "naruto", comic.Name)
assert.Equal(t, "c693", comic.IssueNumber)
assert.Equal(t, 20, len(comic.Links))
}

func TestSiteLoaderMangarock(t *testing.T) {
url := "https://mangarock.com/manga/mrs-serie-35593/chapter/mrs-chapter-100051049"
conf := new(config.ComicConfig)

comic, err := LoadComicFromSource(conf, "mangarock.com", url, "italy", "")
collection, err := LoadComicFromSource(conf, "mangarock.com", url, "italy", "", false)

assert.Nil(t, err)
assert.Equal(t, len(collection), 1)

comic := collection[0]

assert.Equal(t, "mangarock.com", comic.Source)
assert.Equal(t, url, comic.URLSource)
assert.Equal(t, "Boruto: Naruto Next Generations", comic.Name)
assert.Equal(t, "Vol.4 Chapter 14: Teamwork...!!", comic.IssueNumber)
assert.Equal(t, 49, len(comic.Links))
}

func TestSiteLoaderMangareader(t *testing.T) {
url := "https://www.mangareader.net/naruto/700"
conf := new(config.ComicConfig)
collection, err := LoadComicFromSource(conf, "www.mangareader.net", url, "", "", false)

assert.Nil(t, err)
assert.Equal(t, len(collection), 1)

comic := collection[0]

assert.Equal(t, "www.mangareader.net", comic.Source)
assert.Equal(t, url, comic.URLSource)
assert.Equal(t, "naruto", comic.Name)
assert.Equal(t, "700", comic.IssueNumber)
assert.Equal(t, 23, len(comic.Links))
}

func TestSiteLoaderComicExtra(t *testing.T) {
url := "https://www.comicextra.com/daredevil-2016/chapter-600/full"
conf := new(config.ComicConfig)
comic, err := LoadComicFromSource(conf, "www.comicextra.com", url, "", "")
collection, err := LoadComicFromSource(conf, "www.comicextra.com", url, "", "", false)

assert.Nil(t, err)
assert.Equal(t, len(collection), 1)

comic := collection[0]

assert.Equal(t, "www.comicextra.com", comic.Source)
assert.Equal(t, url, comic.URLSource)
assert.Equal(t, "daredevil-2016", comic.Name)
assert.Equal(t, "chapter-600", comic.IssueNumber)
assert.Equal(t, 43, len(comic.Links))
}

func TestSiteLoaderMangahereIsDisabled(t *testing.T) {
url := "http://www.mangahere.cc/manga/shingeki_no_kyojin_before_the_fall/c048/"
conf := new(config.ComicConfig)

_, err := LoadComicFromSource(conf, "www.mangahere.cc", url, "", "")

assert.EqualError(t, err, "mangahere is currently disabled")
}

func TestLoaderUnknownSource(t *testing.T) {
url := "http://example.com"
conf := new(config.ComicConfig)

comic, err := LoadComicFromSource(conf, "example.com", url, "", "")
collection, err := LoadComicFromSource(conf, "example.com", url, "", "", false)

if assert.NotNil(t, err) {
assert.Equal(t, fmt.Errorf("It was not possible to determine the source"), err)
}
assert.Equal(t, "example.com", comic.Source)
assert.Equal(t, len(collection), 0)
}
Loading

0 comments on commit 03c3e70

Please sign in to comment.