diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1a29a93ed..94434d5e7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2034,6 +2034,7 @@ TeleQuebecEmissionIE, TeleQuebecIE, TeleQuebecLiveIE, + TeleQuebecSeasonIE, TeleQuebecSquatIE, TeleQuebecVideoIE, ) diff --git a/yt_dlp/extractor/telequebec.py b/yt_dlp/extractor/telequebec.py index 7f5d5d29b..be927d6c5 100644 --- a/yt_dlp/extractor/telequebec.py +++ b/yt_dlp/extractor/telequebec.py @@ -1,7 +1,12 @@ +import json +import re + from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, smuggle_url, + traverse_obj, try_get, unified_timestamp, ) @@ -28,81 +33,174 @@ class TeleQuebecIE(TeleQuebecBaseIE): )/(?P\d+) ''' _TESTS = [{ - # available till 01.01.2023 - 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane', - 'info_dict': { - 'id': '6155972771001', - 'ext': 'mp4', - 'title': 'Un petit choc et puis repart!', - 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374', - 'timestamp': 1589262469, - 'uploader_id': '6150020952001', - 'upload_date': '20200512', - }, - 'add_ie': ['BrightcoveNew'], + 'url': 'https://zonevideo.telequebec.tv/media/1/exemple', + 'only_matching': True, }, { - 'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout', + 'url': 'https://coucou.telequebec.tv/videos/1004958/top-cornichon/l-anniversaire-de-top-cornichon', 'info_dict': { - 'id': '6167180337001', + 'id': '6370144678112', 'ext': 'mp4', - 'title': 'Le soleil', - 'description': 'md5:64289c922a8de2abbe99c354daffde02', + 'title': 'L\'anniversaire de Top Cornichon', + 'description': 'md5:fc4fb2967dcea0baa8b6d39a11da917b', 'uploader_id': '6150020952001', - 'upload_date': '20200625', - 'timestamp': 1593090307, + 'duration': 360.107, + 'thumbnail': 'md5:027b0d8b371bc86d5ac9c024acfeb6f2', + 'timestamp': 1742217124, + 'upload_date': '20250317', + 'series': 'Top Cornichon', + 'episode': 'L\'anniversaire de Top Cornichon', + }, + 'params': { + 'skip_download': True, }, 'add_ie': ['BrightcoveNew'], - }, { - # no description - 'url': 'http://zonevideo.telequebec.tv/media/30261', - 'only_matching': True, - }, { - 'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain', - 'only_matching': True, }] def _real_extract(self, url): media_id = self._match_id(url) - media = self._download_json( + meta = self._download_json( 'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id, - media_id)['media'] - source_id = next(source_info['sourceId'] for source_info in media['streamInfos'] if source_info.get('source') == 'Brightcove') - info = self._brightcove_result(source_id, '22gPKdt7f') - product = media.get('product') or {} - season = product.get('season') or {} + media_id, fatal=False) + media = meta.get('media') if meta else None + stream_infos = try_get(media, lambda m: m['streamInfos']) or [] + + if media and any(si and si.get('source') == 'Brightcove' for si in stream_infos): + source_id = next( + si['sourceId'] for si in stream_infos + if si and si.get('source') == 'Brightcove') + info = self._brightcove_result(source_id, '22gPKdt7f') + product = media.get('product') or {} + season = product.get('season') or {} + info.update({ + 'description': try_get(media, lambda x: x['descriptions'][-1]['text'], str), + 'series': try_get(season, lambda x: x['serie']['titre']), + 'season': season.get('name'), + 'season_number': int_or_none(season.get('seasonNo')), + 'episode': product.get('titre'), + 'episode_number': int_or_none(product.get('episodeNo')), + }) + return info + + # Coucou Next.js: Brightcove id is in __NEXT_DATA__ when mnmedias has no catalogue row. + webpage = self._download_webpage(url, media_id) + next_json = self._search_regex( + r'', + webpage, '__NEXT_DATA__') + next_data = self._parse_json(next_json, media_id) + media_obj = traverse_obj(next_data, ('props', 'pageProps', 'media')) or {} + bc_video_id = media_obj.get('mediaId') + if not bc_video_id: + raise ExtractorError('Unable to extract Brightcove video id') + + info = self._brightcove_result(str(bc_video_id), '22gPKdt7f') + hero = traverse_obj(next_data, ('props', 'pageProps', 'hero')) or {} info.update({ - 'description': try_get(media, lambda x: x['descriptions'][-1]['text'], str), - 'series': try_get(season, lambda x: x['serie']['titre']), - 'season': season.get('name'), - 'season_number': int_or_none(season.get('seasonNo')), - 'episode': product.get('titre'), - 'episode_number': int_or_none(product.get('episodeNo')), + 'description': media_obj.get('description'), + 'episode': media_obj.get('titre'), + 'series': hero.get('nom'), }) return info -class TeleQuebecSquatIE(InfoExtractor): - _VALID_URL = r'https?://squat\.telequebec\.tv/videos/(?P\d+)' +class TeleQuebecSeasonIE(InfoExtractor): + """telequebec.tv/contenu/{slug}/saison/{n} — expands to episode /regarder/ URLs via GraphQL.""" + + _VALID_URL = r'https?://(?:www\.)?telequebec\.tv/contenu/(?P[^/?#]+)/saison/(?P\d+)' _TESTS = [{ - 'url': 'https://squat.telequebec.tv/videos/9314', + 'url': 'https://telequebec.tv/contenu/macaroni-tout-garni/saison/1', + 'playlist_mincount': 15, 'info_dict': { - 'id': 'd59ae78112d542e793d83cc9d3a5b530', - 'ext': 'mp4', - 'title': 'Poupeflekta', - 'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75', - 'duration': 1351, - 'timestamp': 1569057600, - 'upload_date': '20190921', - 'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir', - 'season': 'Saison 3', - 'season_number': 3, - 'episode_number': 57, + 'id': 'macaroni-tout-garni-s1', + 'title': 'Macaroni tout garni', }, 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.telequebec.tv/contenu/macaroni-tout-garni/saison/1/', + 'only_matching': True, }] + def _real_extract(self, url): + mobj = self._match_valid_url(url) + slug, season = mobj.group('slug'), int(mobj.group('season')) + playlist_id = f'{slug}-s{season}' + + query = '''query ($slug: String!, $season: Int!) { + productPage(rootProductSlug: $slug, seasonNumber: $season) { + ... on ArtisanPage { + blocks { + ... on ArtisanBlocksProductPlayableProductsStrip { + blockConfiguration { + rootProduct { title } + season { title } + products { title videoCanonicalUrl } + } + } + } + } + } + }''' + + resp = self._download_json( + 'https://api.pc-cms.tele.quebec/graphql', playlist_id, + data=json.dumps({ + 'query': query, + 'variables': {'slug': slug, 'season': season}, + }, separators=(',', ':')).encode(), + headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'Origin': 'https://telequebec.tv', + }) + + errs = traverse_obj(resp, ('errors', ..., 'message', {str})) + if errs: + raise ExtractorError(', '.join(errs), expected=True) + + playlist_title = None + products = [] + for block in traverse_obj(resp, ('data', 'productPage', 'blocks')) or []: + cfg = traverse_obj(block, 'blockConfiguration') + if not cfg: + continue + prods = cfg.get('products') + if not prods: + continue + products = prods + playlist_title = traverse_obj(cfg, ('season', 'title')) or traverse_obj(cfg, ('rootProduct', 'title')) + break + + if not products: + raise ExtractorError('No playable episodes in this season', expected=True) + + def _episode_sort_key(p): + vu = p.get('videoCanonicalUrl') or '' + m = re.search(r'/(\d+)/?(?:[?#]|$)', vu) + return int(m.group(1)) if m else 0 + + products = sorted(products, key=_episode_sort_key) + + entries = [ + self.url_result( + p['videoCanonicalUrl'], + ie=TeleQuebecEmissionIE.ie_key(), + video_title=p.get('title')) + for p in products + if p.get('videoCanonicalUrl') + ] + + return self.playlist_result( + entries, + playlist_id=playlist_id, + playlist_title=playlist_title or slug) + + +class TeleQuebecSquatIE(InfoExtractor): + _WORKING = False + _VALID_URL = r'https?://squat\.telequebec\.tv/videos/(?P\d+)' + _TESTS = [] + def _real_extract(self, url): video_id = self._match_id(url) @@ -136,25 +234,47 @@ class TeleQuebecEmissionIE(InfoExtractor): ) (?P[^?#&]+) ''' + + @classmethod + def suitable(cls, url): + if re.match(r'https?://(?:www\.)?telequebec\.tv/contenu/[^/?#]+/saison/\d+', url): + return False + return super().suitable(url) + _TESTS = [{ - 'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente', + 'url': 'https://telequebec.tv/regarder/donner-lgout/2/6', 'info_dict': { - 'id': '6154476028001', + 'id': 'ref:100832979', 'ext': 'mp4', - 'title': 'Des soins esthétiques à 377 % d’intérêts annuels, ça vous tente?', - 'description': 'md5:cb4d378e073fae6cce1f87c00f84ae9f', - 'upload_date': '20200505', - 'timestamp': 1588713424, + 'title': 'La Grèce à Québec', + 'description': 'md5:c506e07b90426ad391e18a753c021516', 'uploader_id': '6150020952001', + 'duration': 2695.083, + 'thumbnail': 'md5:17aead23a395fb3f56a376524eb9f23c', + 'timestamp': 1777782475, + 'upload_date': '20260503', }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], + }, { + 'url': 'https://telequebec.tv/regarder/donner-lgout/2/1', + 'only_matching': True, + }, { + 'url': 'https://telequebec.tv/regarder/kamikazes/2/7', + 'only_matching': True, }, { - 'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression', + 'url': 'https://telequebec.tv/regarder/5050', 'only_matching': True, }, { - 'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078', + 'url': 'https://telequebec.tv/regarder/les-magnetiques', 'only_matching': True, }, { - 'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/', + 'url': 'https://telequebec.tv/regarder/les-dalton/2/107', + 'only_matching': True, + }, { + 'url': 'https://telequebec.tv/regarder/macaroni-tout-garni/1/1', 'only_matching': True, }] @@ -164,31 +284,33 @@ def _real_extract(self, url): webpage = self._download_webpage(url, display_id) media_id = self._search_regex( - r'mediaId\s*:\s*(?P\d+)', webpage, 'media id') + ( + r'mediaId\s*:\s*(?P\d+)', + r'"mediaId"\s*,\s*"(?P\d+)"', + r'"mediaId"\s*,\s*(?P\d+)', + r'mediaId\\"\s*,\s*\\"(?P\d+)', + ), + webpage, 'media id') + + meta = self._download_json( + 'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id, + media_id, fatal=False) + media = meta.get('media') if meta else None + stream_infos = try_get(media, lambda m: m['streamInfos']) or [] + if media and any(si and si.get('source') == 'Brightcove' for si in stream_infos): + return self.url_result( + 'http://zonevideo.telequebec.tv/media/' + media_id, + TeleQuebecIE.ie_key()) - return self.url_result( - 'http://zonevideo.telequebec.tv/media/' + media_id, - TeleQuebecIE.ie_key()) + # New telequebec.tv stack no longer mirrors catalogue ids into mnmedias; Brightcove loads + # videoId=ref:{mediaId} (see web player bundle: videoId:`ref:${mediaId}`). + return TeleQuebecBaseIE._brightcove_result(f'ref:{media_id}', 'ja7RtbSne') class TeleQuebecLiveIE(TeleQuebecBaseIE): + _WORKING = False _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?Pendirect)' - _TEST = { - 'url': 'http://zonevideo.telequebec.tv/endirect/', - 'info_dict': { - 'id': '6159095684001', - 'ext': 'mp4', - 'title': 're:^Télé-Québec [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'is_live': True, - 'description': 'Canal principal de Télé-Québec', - 'uploader_id': '6150020952001', - 'timestamp': 1590439901, - 'upload_date': '20200525', - }, - 'params': { - 'skip_download': True, - }, - } + _TESTS = [] def _real_extract(self, url): return self._brightcove_result('6159095684001', 'skCsmi2Uw') @@ -198,15 +320,7 @@ class TeleQuebecVideoIE(TeleQuebecBaseIE): _VALID_URL = r'https?://video\.telequebec\.tv/player(?:-live)?/(?P\d+)' _TESTS = [{ 'url': 'https://video.telequebec.tv/player/31110/stream', - 'info_dict': { - 'id': '6202570652001', - 'ext': 'mp4', - 'title': 'Le coût du véhicule le plus vendu au Canada / Tous les frais liés à la procréation assistée', - 'description': 'md5:685a7e4c450ba777c60adb6e71e41526', - 'upload_date': '20201019', - 'timestamp': 1603115930, - 'uploader_id': '6101674910001', - }, + 'only_matching': True, }, { 'url': 'https://video.telequebec.tv/player-live/28527', 'only_matching': True,