Add support for the NEW Way.

It started as just a way to fix subtitles download. It ended in a
complete new way to get info about video stream AND subtitles.

Lots of things have change on CR since the last major update, and on
some pages, the old subtitle fetch fail. They changed the player on the
page from the old flash one to a HTML based one and albeit most scrapped
info are still valid, some are no longer working on some new releases.

It should be more reliable, but there are some drawback. I'm currently
unable to select the resolution, it is 1080 by default. It will probably
not work for non premium account, but, you know, I ask clearly for you
to only use that tool if you have a premium account, so well, I'm not
going to try to support non premium account if it does not work.

Oh, and it add the possibility to download subtitles in the languages of
your choice!

The old mechanism is still there as fallback, but may be removed in the
futur to clean up the code.
This commit is contained in:
Godzil 2020-04-27 22:48:24 +01:00
parent 8655874097
commit 64200a1da9
6 changed files with 292 additions and 84 deletions

View File

@ -379,6 +379,10 @@ function tasks(config: IConfigLine, batchPath: string, done: (err: Error, tasks?
}); });
} }
function commaSeparatedList(value: any, dummyPrevious: any) {
return value.split(',');
}
/** /**
* Parses the arguments and returns a configuration. * Parses the arguments and returns a configuration.
*/ */
@ -396,6 +400,8 @@ function parse(args: string[]): IConfigLine
.option('-e, --episodes <s>', 'Episode list. Read documentation on how to use') .option('-e, --episodes <s>', 'Episode list. Read documentation on how to use')
// Settings // Settings
.option('-l, --crlang <s>', 'CR page language (valid: en, fr, es, it, pt, de, ru).') .option('-l, --crlang <s>', 'CR page language (valid: en, fr, es, it, pt, de, ru).')
.option('-s, --sublang <items>', 'Select the subtitle languages, multiple value separated by a comma ' +
'are accepted (like: frFR,enUS )', commaSeparatedList)
.option('-f, --format <s>', 'The subtitle format.', 'ass') .option('-f, --format <s>', 'The subtitle format.', 'ass')
.option('-o, --output <s>', 'The output path.') .option('-o, --output <s>', 'The output path.')
.option('-s, --series <s>', 'The series name override.') .option('-s, --series <s>', 'The series name override.')

View File

@ -5,6 +5,7 @@ import mkdirp = require('mkdirp');
import my_request = require('./my_request'); import my_request = require('./my_request');
import path = require('path'); import path = require('path');
import subtitle from './subtitle/index'; import subtitle from './subtitle/index';
import vlos from './vlos';
import video from './video/index'; import video from './video/index';
import xml2js = require('xml2js'); import xml2js = require('xml2js');
import log = require('./log'); import log = require('./log');
@ -21,6 +22,14 @@ export default function(config: IConfig, address: string, done: (err: Error, ign
return done(err, false); return done(err, false);
} }
if (page.media != null)
{
/* No player to scrape */
download(config, page, null, done);
}
else
{
/* The old way */
scrapePlayer(config, address, page.id, (errS, player) => scrapePlayer(config, address, page.id, (errS, player) =>
{ {
if (errS) if (errS)
@ -30,6 +39,7 @@ export default function(config: IConfig, address: string, done: (err: Error, ign
download(config, page, player, done); download(config, page, player, done);
}); });
}
}); });
} }
@ -115,7 +125,7 @@ function download(config: IConfig, page: IEpisodePage, player: IEpisodePlayer, d
if (ret) if (ret)
{ {
log.dispEpisode(fileName, 'Fetching...', false); log.dispEpisode(fileName, 'Fetching...', false);
downloadSubtitle(config, player, filePath, (errDS) => downloadSubtitle(config, page, player, filePath, (errDS) =>
{ {
if (errDS) if (errDS)
{ {
@ -124,7 +134,8 @@ function download(config: IConfig, page: IEpisodePage, player: IEpisodePlayer, d
} }
const now = Date.now(); const now = Date.now();
if (player.video.file !== undefined) if ( ((page.media === null) && (player.video.file !== undefined))
|| ((page.media !== null) /* Do they still create page in advance for unreleased episodes? */) )
{ {
log.dispEpisode(fileName, 'Fetching video...', false); log.dispEpisode(fileName, 'Fetching video...', false);
downloadVideo(config, page, player, filePath, (errDV) => downloadVideo(config, page, player, filePath, (errDV) =>
@ -140,10 +151,28 @@ function download(config: IConfig, page: IEpisodePage, player: IEpisodePlayer, d
return complete(fileName, 'Finished!', now, done); return complete(fileName, 'Finished!', now, done);
} }
const isSubtited = Boolean(player.subtitle); let isSubtitled = true;
if (page.media === null)
{
isSubtitled = Boolean(player.subtitle);
}
else
{
if (page.media.subtitles.length === 0)
{
isSubtitled = false;
}
}
let videoExt = '.mp4';
if ( (page.media === null) && (player.video.mode === 'RTMP'))
{
videoExt = path.extname(player.video.file);
}
log.dispEpisode(fileName, 'Merging...', false); log.dispEpisode(fileName, 'Merging...', false);
video.merge(config, isSubtited, player.video.file, filePath, player.video.mode, config.verbose, (errVM) => video.merge(config, isSubtitled, videoExt, filePath, config.verbose, (errVM) =>
{ {
if (errVM) if (errVM)
{ {
@ -164,7 +193,7 @@ function download(config: IConfig, page: IEpisodePage, player: IEpisodePlayer, d
} }
else else
{ {
log.dispEpisode(fileName, 'Error creating folder \'" + filePath + "\'...', true); log.dispEpisode(fileName, 'Error creating folder \'' + filePath + '\'...', true);
return done('Cannot create folder', false); return done('Cannot create folder', false);
} }
} }
@ -172,8 +201,55 @@ function download(config: IConfig, page: IEpisodePage, player: IEpisodePlayer, d
/** /**
* Saves the subtitles to disk. * Saves the subtitles to disk.
*/ */
function downloadSubtitle(config: IConfig, player: IEpisodePlayer, filePath: string, done: (err?: Error | string) => void) function downloadSubtitle(config: IConfig, page: IEpisodePage, player: IEpisodePlayer,
filePath: string, done: (err?: Error | string) => void)
{ {
if (page.media !== null)
{
const subs = page.media.subtitles;
if (subs.length === 0)
{
/* No downloadable subtitles */
console.warn('Can\'t find subtitle ?!');
return done();
}
let i;
let j;
/* Find a proper subtitles */
for (j = 0; j < config.sublang.length; j++)
{
const reqSubLang = config.sublang[j];
for (i = 0; i < subs.length; i++)
{
const curSub = subs[i];
if (curSub.format === 'ass' && curSub.language === reqSubLang)
{
my_request.get(config, curSub.url, (err, result) =>
{
if (err)
{
log.error('An error occured while fetching subtitles...');
return done(err);
}
fs.writeFile(filePath + '.ass', '\ufeff' + result, done);
});
/* Break from the first loop */
j = config.sublang.length;
break;
}
}
}
if (i >= subs.length)
{
done('Cannot find subtitles with requested language(s)');
}
}
else
{
const enc = player.subtitle; const enc = player.subtitle;
if (!enc) if (!enc)
@ -185,6 +261,7 @@ function downloadSubtitle(config: IConfig, player: IEpisodePlayer, filePath: str
{ {
if (errSD) if (errSD)
{ {
log.error('An error occured while getting subtitles...');
return done(errSD); return done(errSD);
} }
@ -206,16 +283,43 @@ function downloadSubtitle(config: IConfig, player: IEpisodePlayer, filePath: str
fs.writeFile(filePath + '.' + format, '\ufeff' + decodedSubtitle, done); fs.writeFile(filePath + '.' + format, '\ufeff' + decodedSubtitle, done);
}); });
}); });
}
} }
/** /**
* Streams the video to disk. * Streams the video to disk.
*/ */
function downloadVideo(config: IConfig, page: IEpisodePage, player: IEpisodePlayer, function downloadVideo(config: IConfig, page: IEpisodePage, player: IEpisodePlayer,
filePath: string, done: (err: Error) => void) filePath: string, done: (err: any) => void)
{ {
if (player == null)
{
/* new way */
const streams = page.media.streams;
let i;
/* Find a proper subtitles */
for (i = 0; i < streams.length; i++)
{
if (streams[i].format === 'vo_adaptive_hls' && streams[i].audio_lang === 'jaJP' &&
streams[i].hardsub_lang === null)
{
video.stream('', streams[i].url, '', filePath,
'mp4', 'HLS', config.verbose, done);
break;
}
}
if (i >= streams.length)
{
done('Cannot find a valid stream');
}
}
else
{
/* Old way */
video.stream(player.video.host, player.video.file, page.swf, filePath, video.stream(player.video.host, player.video.file, page.swf, filePath,
path.extname(player.video.file), player.video.mode, config.verbose, done); path.extname(player.video.file), player.video.mode, config.verbose, done);
}
} }
/** /**
@ -277,25 +381,44 @@ function scrapePage(config: IConfig, address: string, done: (err: Error, page?:
} }
const $ = cheerio.load(result); const $ = cheerio.load(result);
/* First check if we have the new player */
const vlosScript = $('#vilos-iframe-container');
if (vlosScript)
{
const pageMetadata = JSON.parse($('script[type="application/ld+json"]')[0].children[0].data);
const divScript = $('div[id="showmedia_video_box_wide"]');
const scripts = divScript.find('script').toArray();
const script = scripts[2].children[0].data;
let seasonNumber = '1';
let seasonTitle = '';
if (pageMetadata.partOfSeason)
{
seasonNumber = pageMetadata.partOfSeason.seasonNumber;
seasonTitle = pageMetadata.partOfSeason.name;
}
done(null, vlos.getMedia(script, seasonTitle, seasonNumber));
}
else
{
/* Use the old way */
const swf = /^([^?]+)/.exec($('link[rel=video_src]').attr('href')); const swf = /^([^?]+)/.exec($('link[rel=video_src]').attr('href'));
const regexp = /\s*([^\n\r\t\f]+)\n?\s*[^0-9]*([0-9][\-0-9.]*)?,?\n?\s\s*[^0-9]*((PV )?[S0-9][P0-9.]*[a-fA-F]?)/; const regexp = /\s*([^\n\r\t\f]+)\n?\s*[^0-9]*([0-9][\-0-9.]*)?,?\n?\s\s*[^0-9]*((PV )?[S0-9][P0-9.]*[a-fA-F]?)/;
const look = $('#showmedia_about_media').text();
const seasonTitle = $('span[itemprop="title"]').text(); const seasonTitle = $('span[itemprop="title"]').text();
const look = $('#showmedia_about_media').text();
const episodeTitle = $('#showmedia_about_name').text().replace(/[“”]/g, ''); const episodeTitle = $('#showmedia_about_name').text().replace(/[“”]/g, '');
const data = regexp.exec(look); const data = regexp.exec(look);
if (config.debug) if (config.debug) {
{
log.dumpToDebug('episode page', $.html()); log.dumpToDebug('episode page', $.html());
} }
if (!swf || !data) if (!swf || !data) {
{
log.warn('Somethig unexpected in the page at ' + address + ' (data are: ' + look + ')'); log.warn('Somethig unexpected in the page at ' + address + ' (data are: ' + look + ')');
log.warn('Setting Season to 0 and episode to 0...'); log.warn('Setting Season to 0 and episode to 0...');
if (config.debug) if (config.debug) {
{
log.dumpToDebug('episode unexpected', look); log.dumpToDebug('episode unexpected', look);
} }
@ -308,10 +431,9 @@ function scrapePage(config: IConfig, address: string, done: (err: Error, page?:
swf: swf[1], swf: swf[1],
volume: '0', volume: '0',
filename: '', filename: '',
media: null,
}); });
} } else {
else
{
done(null, { done(null, {
episode: data[3], episode: data[3],
id: epId, id: epId,
@ -321,8 +443,10 @@ function scrapePage(config: IConfig, address: string, done: (err: Error, page?:
swf: swf[1], swf: swf[1],
volume: data[2] || '1', volume: data[2] || '1',
filename: '', filename: '',
media: null,
}); });
} }
}
}); });
} }

View File

@ -7,4 +7,5 @@ interface IEpisodePage {
title: string; title: string;
swf: string; swf: string;
filename: string; filename: string;
media: IVlosScript;
} }

14
src/interface/IVlosScript.d.ts vendored Normal file
View File

@ -0,0 +1,14 @@
interface IVlosScript
{
metadata: {
episode_number: any;
id: any;
title: any;
};
confic: any;
subtitles: any;
streams: any;
series: {
title: any;
};
}

View File

@ -9,21 +9,14 @@ import subtitle from '../subtitle/index';
/** /**
* Merges the subtitle and video files into a Matroska Multimedia Container. * Merges the subtitle and video files into a Matroska Multimedia Container.
*/ */
export default function(config: IConfig, isSubtitled: boolean, rtmpInputPath: string, filePath: string, export default function(config: IConfig, isSubtitled: boolean, videoFileExtention: string, filePath: string,
streamMode: string, verbose: boolean, done: (err: Error) => void) verbose: boolean, done: (err: Error) => void)
{ {
const subtitlePath = filePath + '.' + (subtitle.formats[config.format] ? config.format : 'ass'); const subtitlePath = filePath + '.' + (subtitle.formats[config.format] ? config.format : 'ass');
let videoPath = filePath; let videoPath = filePath;
let cp; let cp;
if (streamMode === 'RTMP') videoPath += videoFileExtention;
{
videoPath += path.extname(rtmpInputPath);
}
else
{
videoPath += '.mp4';
}
cp = childProcess.exec(command() + ' ' + cp = childProcess.exec(command() + ' ' +
'-o "' + filePath + '.mkv" ' + '-o "' + filePath + '.mkv" ' +

70
src/vlos.ts Normal file
View File

@ -0,0 +1,70 @@
'use strict';
export default {getMedia};
function getMedia(vlosScript: string, seasonTitle: string, seasonNumber: string): IEpisodePage
{
let vlosMedia: IVlosScript;
function f(script: string) {
/* We need to scope things */
/* This is what will give us the medias */
function VilosPlayer() {
this.load = function(a: string, b: any, c: any)
{
vlosMedia = this.config.media;
vlosMedia.series = this.config.analytics.media_reporting_parent;
};
this.config = {};
this.config.player = {};
this.config.player.pause_screen = {};
this.config.language = '';
}
/* Let's stub what the script need */
const window = {
WM: {
UserConsent: {
getUserConsentAdvertisingState(): string { return ''; }
}
}
};
const document = {
getElementsByClassName(a: any): any { return {length: 0}; },
};
const localStorage = {
getItem(a: any): any { return null; },
};
const $ = {
cookie(a: any) { /* nothing */ },
};
/*
Evil ugly things. Need to run the script from a somewhat untrusted source.
Need to find a better way of doing.
*/
// tslint:disable-next-line:no-eval
eval(script);
}
f(vlosScript);
if (vlosMedia === undefined)
{
console.error('Error fetching vlos data - aborting - Please report the error if happen again.');
process.exit(-1);
}
return {
episode: vlosMedia.metadata.episode_number,
id: vlosMedia.metadata.id,
series: vlosMedia.series.title,
season: seasonTitle,
title: vlosMedia.metadata.title,
swf: '',
volume: seasonNumber,
filename: '',
media: vlosMedia,
};
}