From 876def43928241ca33b064732ae32a316cef2abb Mon Sep 17 00:00:00 2001 From: Godzil Date: Sat, 11 Aug 2018 20:42:12 +0100 Subject: [PATCH] Add code to check what langage CR is serving the page, and try to adapt some regexp to that. The langage can be forced by the user Fix #1 and Fix #76 --- src/batch.ts | 1 + src/interface/IConfig.d.ts | 1 + src/languages.ts | 47 ++++++++++++++++++++++++++++++++++++++ src/my_request.ts | 17 ++++++++++++++ src/series.ts | 5 ++-- 5 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 src/languages.ts diff --git a/src/batch.ts b/src/batch.ts index d88d632..fb435f8 100644 --- a/src/batch.ts +++ b/src/batch.ts @@ -349,6 +349,7 @@ function parse(args: string[]): IConfigLine // Episode filter .option('-e, --episodes ', 'Episode list. Read documentation on how to use') // Settings + .option('-l, --crlang ', 'CR page language (valid: en, fr, es, it, pt, de, ru).') .option('-f, --format ', 'The subtitle format.', 'ass') .option('-o, --output ', 'The output path.') .option('-s, --series ', 'The series name override.') diff --git a/src/interface/IConfig.d.ts b/src/interface/IConfig.d.ts index 957c840..7952070 100644 --- a/src/interface/IConfig.d.ts +++ b/src/interface/IConfig.d.ts @@ -7,6 +7,7 @@ interface IConfig { merge?: boolean; episodes?: string; // Settings + crlang?: string; format?: string; output?: string; series?: string; diff --git a/src/languages.ts b/src/languages.ts new file mode 100644 index 0000000..9c89b43 --- /dev/null +++ b/src/languages.ts @@ -0,0 +1,47 @@ +'use strict'; + +const localeCC: { [id: string]: string; } = +{ + enUS: 'en', enGB: 'en', + esLA: 'es', esES: 'es', + ptPT: 'pt', ptBR: 'pt', + frFR: 'fr', + deDE: 'de', + itIT: 'it', + ruRU: 'ru', +}; + +export function localeToCC(locale: string): string +{ + let ret = localeCC.enGB; + + if (locale in localeCC) + { + ret = localeCC[locale]; + } + + return ret; +} + +const episodes_regexp: { [id: string]: RegExp; } = +{ + en: /Episode\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i, + fr: /Épisode\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i, + de: /Folge\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i, + es: /Episodio\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i, + it: /Episodio\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i, + pt: /Episódio\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i, + ru: /Серия\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i, +}; + +export function get_epregexp(config: IConfig): RegExp +{ + let ret = episodes_regexp.en; + + if (config.crlang in episodes_regexp) + { + ret = episodes_regexp[config.crlang]; + } + + return ret; +} \ No newline at end of file diff --git a/src/my_request.ts b/src/my_request.ts index 5acb4dd..331a545 100644 --- a/src/my_request.ts +++ b/src/my_request.ts @@ -6,6 +6,7 @@ import Promise = require('bluebird'); import uuid = require('uuid'); import path = require('path'); import fs = require('fs-extra'); +import languages = require('./languages'); import log = require('./log'); import { RequestPromise } from 'request-promise'; @@ -111,6 +112,22 @@ function checkIfUserIsAuth(config: IConfig, done: (err: Error) => void): void const $ = cheerio.load(body); + /* As we are here, try to detect which locale CR tell us */ + const localeRE = /LOCALE = "([a-zA-Z]+)",/g; + const locale = localeRE.exec($('script').text())[1]; + const countryCode = languages.localeToCC(locale); + + if (config.crlang === undefined) + { + log.info('No locale set. Setting to the one reported by CR: "' + countryCode + '"'); + config.crlang = countryCode; + } + else if (config.crlang !== countryCode) + { + log.warn('Crunchy is configured for locale "' + config.crlang + '" but CR report "' + countryCode + '" (LOCALE = ' + locale + ')'); + log.warn('Check if it is correct or rerun (once) with "-l ' + countryCode + '" to correct.'); + } + /* Check if auth worked */ const regexps = /ga\('set', 'dimension[5-8]', '([^']*)'\);/g; const dims = regexps.exec($('script').text()); diff --git a/src/series.ts b/src/series.ts index 3d28129..693351f 100644 --- a/src/series.ts +++ b/src/series.ts @@ -1,12 +1,13 @@ 'use strict'; import cheerio = require('cheerio'); import episode from './episode'; -// import fs = require('fs'); import fs = require('fs-extra'); import my_request = require('./my_request'); import path = require('path'); import url = require('url'); import log = require('./log'); +import languages = require('./languages'); + const persistent = '.crpersistent'; /** @@ -234,7 +235,7 @@ function pageScrape(config: IConfig, task: IConfigTask, done: (err: any, result? const season_name = $(el).closest('ul').prev('a').text(); const volume = /([0-9]+)\s*$/.exec($(el).closest('ul').prev('a').text()); - const regexp = /Episode\s+((PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i; + const regexp = languages.get_epregexp(config); const episode = regexp.exec($(el).children('.series-title').text()); const url = $(el).attr('href');