Add code to check what langage CR is serving the page, and try to adapt

some regexp to that. The langage can be forced by the user

Fix #1 and Fix #76
This commit is contained in:
Godzil 2018-08-11 20:42:12 +01:00
parent 0ba51b7270
commit 876def4392
5 changed files with 69 additions and 2 deletions

View File

@ -349,6 +349,7 @@ function parse(args: string[]): IConfigLine
// Episode filter // Episode filter
.option('-e, --episodes <s>', 'Episode list. Read documentation on how to use') .option('-e, --episodes <s>', 'Episode list. Read documentation on how to use')
// Settings // Settings
.option('-l, --crlang <s>', 'CR page language (valid: en, fr, es, it, pt, de, ru).')
.option('-f, --format <s>', 'The subtitle format.', 'ass') .option('-f, --format <s>', 'The subtitle format.', 'ass')
.option('-o, --output <s>', 'The output path.') .option('-o, --output <s>', 'The output path.')
.option('-s, --series <s>', 'The series name override.') .option('-s, --series <s>', 'The series name override.')

View File

@ -7,6 +7,7 @@ interface IConfig {
merge?: boolean; merge?: boolean;
episodes?: string; episodes?: string;
// Settings // Settings
crlang?: string;
format?: string; format?: string;
output?: string; output?: string;
series?: string; series?: string;

47
src/languages.ts Normal file
View File

@ -0,0 +1,47 @@
'use strict';
const localeCC: { [id: string]: string; } =
{
enUS: 'en', enGB: 'en',
esLA: 'es', esES: 'es',
ptPT: 'pt', ptBR: 'pt',
frFR: 'fr',
deDE: 'de',
itIT: 'it',
ruRU: 'ru',
};
export function localeToCC(locale: string): string
{
let ret = localeCC.enGB;
if (locale in localeCC)
{
ret = localeCC[locale];
}
return ret;
}
const episodes_regexp: { [id: string]: RegExp; } =
{
en: /Episode\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i,
fr: /Épisode\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i,
de: /Folge\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i,
es: /Episodio\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i,
it: /Episodio\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i,
pt: /Episódio\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i,
ru: /Серия\s+((OVA)|(PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i,
};
export function get_epregexp(config: IConfig): RegExp
{
let ret = episodes_regexp.en;
if (config.crlang in episodes_regexp)
{
ret = episodes_regexp[config.crlang];
}
return ret;
}

View File

@ -6,6 +6,7 @@ import Promise = require('bluebird');
import uuid = require('uuid'); import uuid = require('uuid');
import path = require('path'); import path = require('path');
import fs = require('fs-extra'); import fs = require('fs-extra');
import languages = require('./languages');
import log = require('./log'); import log = require('./log');
import { RequestPromise } from 'request-promise'; import { RequestPromise } from 'request-promise';
@ -111,6 +112,22 @@ function checkIfUserIsAuth(config: IConfig, done: (err: Error) => void): void
const $ = cheerio.load(body); const $ = cheerio.load(body);
/* As we are here, try to detect which locale CR tell us */
const localeRE = /LOCALE = "([a-zA-Z]+)",/g;
const locale = localeRE.exec($('script').text())[1];
const countryCode = languages.localeToCC(locale);
if (config.crlang === undefined)
{
log.info('No locale set. Setting to the one reported by CR: "' + countryCode + '"');
config.crlang = countryCode;
}
else if (config.crlang !== countryCode)
{
log.warn('Crunchy is configured for locale "' + config.crlang + '" but CR report "' + countryCode + '" (LOCALE = ' + locale + ')');
log.warn('Check if it is correct or rerun (once) with "-l ' + countryCode + '" to correct.');
}
/* Check if auth worked */ /* Check if auth worked */
const regexps = /ga\('set', 'dimension[5-8]', '([^']*)'\);/g; const regexps = /ga\('set', 'dimension[5-8]', '([^']*)'\);/g;
const dims = regexps.exec($('script').text()); const dims = regexps.exec($('script').text());

View File

@ -1,12 +1,13 @@
'use strict'; 'use strict';
import cheerio = require('cheerio'); import cheerio = require('cheerio');
import episode from './episode'; import episode from './episode';
// import fs = require('fs');
import fs = require('fs-extra'); import fs = require('fs-extra');
import my_request = require('./my_request'); import my_request = require('./my_request');
import path = require('path'); import path = require('path');
import url = require('url'); import url = require('url');
import log = require('./log'); import log = require('./log');
import languages = require('./languages');
const persistent = '.crpersistent'; const persistent = '.crpersistent';
/** /**
@ -234,7 +235,7 @@ function pageScrape(config: IConfig, task: IConfigTask, done: (err: any, result?
const season_name = $(el).closest('ul').prev('a').text(); const season_name = $(el).closest('ul').prev('a').text();
const volume = /([0-9]+)\s*$/.exec($(el).closest('ul').prev('a').text()); const volume = /([0-9]+)\s*$/.exec($(el).closest('ul').prev('a').text());
const regexp = /Episode\s+((PV )?[S0-9][\-P0-9.]*[a-fA-F]?)\s*$/i; const regexp = languages.get_epregexp(config);
const episode = regexp.exec($(el).children('.series-title').text()); const episode = regexp.exec($(el).children('.series-title').text());
const url = $(el).attr('href'); const url = $(el).attr('href');