diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c2658d7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +node_modules/ diff --git a/.jscsrc b/.jscsrc new file mode 100644 index 0000000..c29b41b --- /dev/null +++ b/.jscsrc @@ -0,0 +1,60 @@ +{ + "disallowEmptyBlocks": true, + "disallowImplicitTypeConversion": ["binary", "boolean", "numeric", "string"], + "disallowKeywords": ["delete", "with"], + "disallowKeywordsOnNewLine": ["catch", "else", "finally"], + "disallowMixedSpacesAndTabs": true, + "disallowMultipleLineBreaks": true, + "disallowMultipleLineStrings": true, + "disallowMultipleVarDecl": true, + "disallowNewlineBeforeBlockStatements": true, + "disallowPaddingNewlinesInBlocks": true, + "disallowQuotedKeysInObjects": "allButReserved", + "disallowSpaceAfterObjectKeys": true, + "disallowSpaceAfterPrefixUnaryOperators": true, + "disallowSpaceBeforePostfixUnaryOperators": true, + "disallowSpacesInCallExpression": true, + "disallowSpacesInFunction": {"beforeOpeningRoundBrace": true}, + "disallowSpacesInsideArrayBrackets": true, + "disallowSpacesInsideObjectBrackets": true, + "disallowSpacesInsideParentheses": true, + "disallowTrailingComma": true, + "disallowTrailingWhitespace": true, + "disallowYodaConditions": true, + "jsDoc": { + "checkAnnotations": "closurecompiler", + "checkParamNames": true, + "checkRedundantAccess": true, + "checkRedundantParams": true, + "checkReturnTypes": true, + "checkTypes": "strictNativeCase", + "enforceExistence": true, + "leadingUnderscoreAccess": true, + "requireParamTypes": true, + "requireReturnTypes": true + }, + "maximumLineLength": 80, + "plugins": ["jscs-jsdoc"], + "requireBlocksOnNewline": true, + "requireCamelCaseOrUpperCaseIdentifiers": true, + "requireCapitalizedConstructors": true, + "requireCommaBeforeLineBreak": true, + "requireDotNotation": true, + "requireFunctionDeclarations": true, + "requireLineFeedAtFileEnd": true, + "requireOperatorBeforeLineBreak": true, + "requireParenthesesAroundIIFE": true, + "requireSpaceAfterBinaryOperators": true, + "requireSpaceAfterKeywords": ["case", "catch", "do", "else", "for", "if", "return", "switch", "try", "typeof", "void", "while", "with"], + "requireSpaceAfterLineComment": true, + "requireSpaceBeforeBinaryOperators": true, + "requireSpaceBeforeBlockStatements": true, + "requireSpaceBeforeObjectValues": true, + "requireSpacesInConditionalExpression": true, + "requireSpacesInFunction": {"beforeOpeningCurlyBrace": true}, + "safeContextKeyword": ["that"], + "validateIndentation": 2, + "validateParameterSeparator": ", ", + "validateQuoteMarks": "'", + "validateLineBreaks": "LF" +} diff --git a/.jshintrc b/.jshintrc new file mode 100644 index 0000000..8954797 --- /dev/null +++ b/.jshintrc @@ -0,0 +1,79 @@ +{ + "bitwise" : false, + "camelcase" : false, + "curly" : false, + "eqeqeq" : true, + "es3" : false, + "forin" : true, + "freeze" : true, + "immed" : true, + "indent" : 4, + "latedef" : "nofunc", + "newcap" : true, + "noarg" : true, + "noempty" : true, + "nonbsp" : true, + "nonew" : true, + "plusplus" : true, + "quotmark" : "single", + "undef" : true, + "unused" : true, + "singleGroups" : true, + "strict" : true, + "maxparams" : 5, + "maxdepth" : 5, + "maxstatements": 25, + "maxcomplexity": 5, + "maxlen" : 80, + + "asi" : false, + "boss" : false, + "debug" : false, + "eqnull" : false, + "evil" : false, + "expr" : false, + "esnext" : false, + "funcscope" : false, + "globalstrict" : false, + "iterator" : false, + "lastsemic" : false, + "laxbreak" : false, + "laxcomma" : false, + "loopfunc" : false, + "maxerr" : 50, + "moz" : false, + "multistr" : false, + "notypeof" : false, + "noyield" : false, + "proto" : false, + "scripturl" : false, + "scope" : false, + "shadow" : false, + "sub" : false, + "supernew" : false, + "validthis" : false, + "withstmt" : false, + + "browser" : false, + "browserify" : false, + "couch" : false, + "devel" : false, + "dojo" : false, + "jasmine" : false, + "jquery" : false, + "mootools" : false, + "mocha" : false, + "node" : true, + "nonstandard" : false, + "phantom" : false, + "prototypejs" : false, + "qunit" : false, + "rhino" : false, + "shelljs" : false, + "typed" : false, + "worker" : false, + "wsh" : false, + "yui" : false, + + "globals" : [] +} diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..106aee2 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2015 Roel van Uden + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/README.md b/README.md index 9bfe403..df6828d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,44 @@ -# crunchyroll.js -CrunchyRoll.js is capable of downloading anime from the popular CrunchyRoll streaming site. +# CrunchyRoll.js + +CrunchyRoll.js is capable of downloading anime from the popular CrunchyRoll +streaming site. Each episode is stored with the original video encoding +(usually H.264, MP4) and the configured subtitle format (ASS or SRT). The two +output files can optionally be muxed into a single MKV file. + +## Motivation + +I **love** *CrunchyRoll*. They have been working hard to provide an amazing service to +*anime* fans and provide the *absolute best* way to watch *anime* series in a legal +way. As they offer a streaming service, they do not offer files to be downloaded +and enjoyed while offline (or travelling). This is understandable from a +business perspective, but extremely annoying for the end-user. I put this +application together to enable the files to be stored and enjoyed while offline. +Please do not download as much as you can and stop your premium subscription; +without our financial support, they cannot give us such as great service! +**Do not share downloaded files and delete them if you ever stop your +premium subscription!** + +## Status + +### Implemented + +* Subtitle decoding. +* Subtitle converter for SRT subtitle output. +* Video streaming. +* Episode page scraping with subtitle saving and video streaming. + +### Pending Implementation + +* Detect and write the appropriate video extension (instead of hard-code mp4). +* Improve SRT support for i, b and u. +* Add ASS support. +* Add muxing (MP4+ASS=MKV). +* Add series API to download an entire series rather than per-episode. +* Add batch-mode to queue a bunch of series and do incremental downloads. +* Add authentication to the entire stack to support premium content. +* Add CLI interface with all the options. +* Enjoy beautiful anime series from disk when internet is down. + +## Work In Progress + +Open an issue or e-mail me directly. I'd be happy to answer your questions. diff --git a/app.js b/app.js new file mode 100644 index 0000000..3db8b42 --- /dev/null +++ b/app.js @@ -0,0 +1,24 @@ +'use strict'; + +// TODO: Improve SRT support for , and . +// TODO: Add ASS support. +// TODO: Add muxing (MP4+ASS=MKV). +// TODO: Add series API to download an entire series rather than per-episode. +// TODO: Add batch-mode to queue a bunch of series and do incremental downloads. +// TODO: Add authentication to the entire stack to support premium content. +// TODO: Add CLI interface with all the options. + +var config = { + format: undefined, // defaults to srt + path: undefined, // defaults to process.cwd() + tag: undefined, // defaults to CrunchyRoll +}; + +var episode = require('./src/episode'); +episode( + config, + 'http://www.crunchyroll.com/fairy-tail/episode-1-the-dragon-king-652167', + function(err) { + if (err) return console.log(err); + console.log('All done!'); +}); diff --git a/bin/rtmpdump.exe b/bin/rtmpdump.exe new file mode 100644 index 0000000..f0c3273 Binary files /dev/null and b/bin/rtmpdump.exe differ diff --git a/package.json b/package.json new file mode 100644 index 0000000..5574486 --- /dev/null +++ b/package.json @@ -0,0 +1,17 @@ +{ + "name": "crunchyroll", + "version": "1.0.0", + "description": "", + "main": "app.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "author": "", + "license": "ISC", + "dependencies": { + "big-integer": "^1.4.1", + "cheerio": "^0.18.0", + "request": "^2.51.0", + "xml2js": "^0.4.4" + } +} diff --git a/src/episode.js b/src/episode.js new file mode 100644 index 0000000..a3b3dfe --- /dev/null +++ b/src/episode.js @@ -0,0 +1,137 @@ +'use strict'; +var cheerio = require('cheerio'); +var fs = require('fs'); +var request = require('request'); +var path = require('path'); +var subtitle = require('./subtitle'); +var video = require('./video'); +var xml2js = require('xml2js'); + +/** + * Streams the episode video and subtitle to disk. + * @param {Object} config + * @param {string} address + * @param {function(Error)} done + */ +module.exports = function (config, address, done) { + _page(address, function(err, page) { + if (err) return done(err); + _player(address, page.id, function(err, player) { + if (err) return done(err); + _download(config, page, player, done); + }); + }); +}; + +/** + * Downloads the subtitle and video. + * @param {Object} config + * @param {Object} page + * @param {Object} player + * @param {function(Error)} done + */ +function _download(config, page, player, done) { + var tag = config.tag || 'CrunchyRoll'; + var episode = (page.episode < 10 ? '0' : '') + page.episode; + var fileName = page.series + ' - ' + episode + ' [' + tag + ']'; + var filePath = path.join(config.path || process.cwd(), fileName); + _subtitle(config, player, filePath, function(err) { + if (err) return done(err); + _video(config, page, player, filePath, done); + }); +} + +/** + * Requests the page data and scrapes the id, episode, series and swf. + * @private + * @param {string} address + * @param {function(Error, Object=)} done + */ +function _page(address, done) { + var id = parseInt((address.match(/[0-9]+$/) || [0])[0], 10); + if (!id) return done(new Error('Invalid address.')); + request.get(address, function(err, res, body) { + if (err) return done(err); + var $ = cheerio.load(body); + var swf = /^([^?]+)/.exec($('link[rel=video_src]').attr('href')); + var title = /Watch ([\w\W]+) Episode ([0-9]+)/.exec($('title').text()); + if (!swf || !title) return done(new Error('Invalid page.')); + done(undefined, { + id: id, + episode: parseInt(title[2], 10), + series: title[1], + swf: swf[1] + }); + }); +} + +/** + * Requests the player data and scrapes the subtitle and video data. + * @private + * @param {string} address + * @param {number} id + * @param {function(Error, Object=)} done + */ +function _player(address, id, done) { + var url = address.match(/^(https?:\/\/[^\/]+)/); + if (!url) return done(new Error('Invalid address.')); + request.post({ + form: {current_page: address}, + url: url[1] + '/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=' + id + }, function(err, res, xml) { + if (err) return done(err); + xml2js.parseString(xml, { + explicitArray: false, + explicitRoot: false + }, function(err, player) { + if (err) return done(err); + done(undefined, { + subtitle: { + id: player['default:preload'].subtitle.$.id, + iv: player['default:preload'].subtitle.iv, + data: player['default:preload'].subtitle.data + }, + video: { + file: player['default:preload'].stream_info.file, + host: player['default:preload'].stream_info.host + } + }); + }); + }); +} + +/** + * Saves the subtitles to disk. + * @param {Object} config + * @param {Object} player + * @param {string} filePath + * @param {function(Error)} done + */ +function _subtitle(config, player, filePath, done) { + var contents = player.subtitle; + subtitle.decode(contents.id, contents.iv, contents.data, function(err, data) { + if (err) return done(err); + var format = subtitle.formats[config.format] ? config.format : 'srt'; + subtitle.formats[format](data, function(err, decodedSubtitle) { + if (err) return done(err); + fs.writeFile(filePath + '.' + format, decodedSubtitle, done); + }); + }); +} + +/** +* Streams the video to disk. +* @param {Object} config +* @param {Object} page +* @param {Object} player +* @param {string} filePath +* @param {function(Error)} done +*/ +function _video(config, page, player, filePath, done) { + video.stream( + player.video.host, + player.video.file, + page.swf, + filePath + '.mp4', + done); +} diff --git a/src/subtitle/decode.js b/src/subtitle/decode.js new file mode 100644 index 0000000..e3728b7 --- /dev/null +++ b/src/subtitle/decode.js @@ -0,0 +1,98 @@ +'use strict'; +var crypto = require('crypto'); +var bigInt = require('big-integer'); +var zlib = require('zlib'); + +/** + * Decodes the data. + * @param {number} id + * @param {(Buffer|string)} iv + * @param {(Buffer|string)} data + * @param {function(Error, Buffer=)} done + */ +module.exports = function(id, iv, data, done) { + try { + _decompress(_decrypt(id, iv, data), done); + } catch(e) { + done(e); + } +}; + +/** + * Decrypts the data. + * @private + * @param {number} id + * @param {(Buffer|string)} iv + * @param {(Buffer|string)} data + * @return {Buffer} + */ +function _decrypt(id, iv, data) { + if (typeof iv === 'string') iv = new Buffer(iv, 'base64'); + if (typeof data === 'string') data = new Buffer(data, 'base64'); + var decipher = crypto.createDecipheriv('aes-256-cbc', _key(id), iv); + decipher.setAutoPadding(false); + return Buffer.concat([decipher.update(data), decipher.final()]); +} + +/** + * Decompresses the data. + * @private + * @param {Buffer} data + * @param {function(Error, Buffer=)} done + */ +function _decompress(data, done) { + try { + zlib.inflate(data, done); + } catch(e) { + done(undefined, data); + } +} + +/** + * Generates a key. + * @private + * @param {number} subtitleId + * @return {Buffer} + */ +function _key(subtitleId) { + var hash = _secret(20, 97, 1, 2) + _magic(subtitleId); + var result = new Buffer(32); + result.fill(0); + crypto.createHash('sha1').update(hash).digest().copy(result); + return result; +} + +/** + * Generates a magic number. + * @private + * @param {number} subtitleId + * @return {number} + */ +function _magic(subtitleId) { + var base = Math.floor(Math.sqrt(6.9) * Math.pow(2, 25)); + var hash = bigInt(base).xor(subtitleId); + var multipliedHash = bigInt(hash).multiply(32); + return bigInt(hash).xor(hash >> 3).xor(multipliedHash).toJSNumber(); +} + +/** + * Generates a secret string based on a Fibonacci sequence. + * @private + * @param {number} size + * @param {number} modulo + * @param {number} firstSeed + * @param {number} secondSeed + * @return {string} + */ +function _secret(size, modulo, firstSeed, secondSeed) { + var currentValue = firstSeed + secondSeed; + var previousValue = secondSeed; + var result = ''; + for (var i = 0; i < size; i += 1) { + var oldValue = currentValue; + result += String.fromCharCode(currentValue % modulo + 33); + currentValue += previousValue; + previousValue = oldValue; + } + return result; +} diff --git a/src/subtitle/formats/index.js b/src/subtitle/formats/index.js new file mode 100644 index 0000000..c944dd5 --- /dev/null +++ b/src/subtitle/formats/index.js @@ -0,0 +1,3 @@ +module.exports = { + srt: require('./srt') +}; diff --git a/src/subtitle/formats/srt.js b/src/subtitle/formats/srt.js new file mode 100644 index 0000000..75e1570 --- /dev/null +++ b/src/subtitle/formats/srt.js @@ -0,0 +1,81 @@ +'use strict'; +var xml2js = require('xml2js'); + +/** + * Converts an input buffer to a SRT subtitle. + * @param {Buffer|string} input + * @param {function(Error, string=)} done + */ +module.exports = function(input, done) { + if (typeof buffer !== 'string') input = input.toString(); + xml2js.parseString(input, { + explicitArray: false, + explicitRoot: false + }, function(err, xml) { + try { + if (err) return done(err); + done(undefined, xml.events.event.map(_convert).join('\n')); + } catch(err) { + done(err); + } + }); +}; + +/** + * Converts an event. + * @param {Object} event + * @param {number} index + * @returns {string} + */ +function _convert(event, index) { + var attributes = event.$; + return (index + 1) + '\n' + + _time(attributes.start) + ' --> ' + _time(attributes.end) + '\n' + + _text(attributes.text) + '\n'; +} + +/** + * Prefixes a value. + * @param {string} value + * @param {number} length + * @returns {string} + */ +function _prefix(value, length) { + while (value.length < length) value = '0' + value; + return value; +} + +/** + * Suffixes a value. + * @param {string} value + * @param {number} length + * @returns {string} + */ +function _suffix(value, length) { + while (value.length < length) value = value + '0'; + return value; +} + +/** + * Formats a text value. + * @param {string} text + * @returns {string} + */ +function _text(text) { + return text.replace(/{[^}]+}/g, '').replace(/\\n/ig, '\n'); +} + +/** + * Formats a time stamp. + * @param {string} time + * @returns {string} + */ +function _time(time) { + var all = time.match(/^([0-9]+):([0-9]+):([0-9]+)\.([0-9]+)$/); + if (!all) throw new Error('Invalid time.'); + var hours = _prefix(all[1], 2); + var minutes = _prefix(all[2], 2); + var seconds = _prefix(all[3], 2); + var milliseconds = _suffix(all[4], 3); + return hours + ':' + minutes + ':' + seconds + ',' + milliseconds; +} diff --git a/src/subtitle/index.js b/src/subtitle/index.js new file mode 100644 index 0000000..8b29f16 --- /dev/null +++ b/src/subtitle/index.js @@ -0,0 +1,4 @@ +module.exports = { + decode: require('./decode'), + formats: require('./formats') +}; diff --git a/src/video/index.js b/src/video/index.js new file mode 100644 index 0000000..82621e6 --- /dev/null +++ b/src/video/index.js @@ -0,0 +1,3 @@ +module.exports = { + stream: require('./stream') +}; diff --git a/src/video/stream.js b/src/video/stream.js new file mode 100644 index 0000000..bfeb3df --- /dev/null +++ b/src/video/stream.js @@ -0,0 +1,31 @@ +'use strict'; +var childProcess = require('child_process'); +var path = require('path'); +var os = require('os'); + +/** + * Streams the video to disk using rtmpdump. + * @param {string} rtmpUrl + * @param {string} inputPath + * @param {string} swfUrl + * @param {string} outputPath + * @param {function(Error)} done + */ +module.exports = function(rtmpUrl, inputPath, swfUrl, outputPath, done) { + childProcess.exec(_command() + ' ' + + '-r "' + rtmpUrl + '" ' + + '-y "' + inputPath + '" ' + + '-W "' + swfUrl + '" ' + + '-o "' + outputPath + '"', { + maxBuffer: Infinity + }, done); +}; + +/** + * Determines the command for the operating system. + * @returns {string} + */ +function _command() { + if (os.platform() !== 'win32') return 'rtmpdump'; + return path.join(__dirname, '../../bin/rtmpdump.exe'); +}