Merge pull request #47 from JorrinKievit/pr-14-v2

Added VidSrc and StreamBucket Part 2
This commit is contained in:
mrjvs 2024-01-05 19:14:56 +01:00 committed by GitHub
commit 5199213a70
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 331 additions and 2 deletions

View File

@ -18,6 +18,8 @@ module.exports = {
},
plugins: ['@typescript-eslint', 'import', 'prettier'],
rules: {
'no-plusplus': 'off',
'no-bitwise': 'off',
'no-underscore-dangle': 'off',
'@typescript-eslint/no-explicit-any': 'off',
'no-console': 'off',

View File

@ -41,6 +41,7 @@ async function runBrowserScraping(
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
page.on('console', (message) => console.log(`${message.type().slice(0, 3).toUpperCase()} ${message.text()}`));
await page.goto(server.resolvedUrls.local[0]);
await page.waitForFunction('!!window.scrape', { timeout: 5000 });

View File

@ -4,7 +4,7 @@ export type FetcherOptions = {
baseUrl?: string;
headers?: Record<string, string>;
query?: Record<string, string>;
method?: 'GET' | 'POST';
method?: 'HEAD' | 'GET' | 'POST';
readHeaders?: string[];
body?: Record<string, any> | string | FormData | URLSearchParams;
};
@ -17,7 +17,7 @@ export type DefaultedFetcherOptions = {
headers: Record<string, string>;
query: Record<string, string>;
readHeaders: string[];
method: 'GET' | 'POST';
method: 'HEAD' | 'GET' | 'POST';
};
export type FetcherResponse<T = any> = {

View File

@ -3,15 +3,18 @@ import { febboxHlsScraper } from '@/providers/embeds/febbox/hls';
import { febboxMp4Scraper } from '@/providers/embeds/febbox/mp4';
import { mixdropScraper } from '@/providers/embeds/mixdrop';
import { mp4uploadScraper } from '@/providers/embeds/mp4upload';
import { streambucketScraper } from '@/providers/embeds/streambucket';
import { streamsbScraper } from '@/providers/embeds/streamsb';
import { upcloudScraper } from '@/providers/embeds/upcloud';
import { upstreamScraper } from '@/providers/embeds/upstream';
import { vidsrcembedScraper } from '@/providers/embeds/vidsrc';
import { flixhqScraper } from '@/providers/sources/flixhq/index';
import { goMoviesScraper } from '@/providers/sources/gomovies/index';
import { kissAsianScraper } from '@/providers/sources/kissasian/index';
import { lookmovieScraper } from '@/providers/sources/lookmovie';
import { remotestreamScraper } from '@/providers/sources/remotestream';
import { showboxScraper } from '@/providers/sources/showbox/index';
import { vidsrcScraper } from '@/providers/sources/vidsrc/index';
import { zoechipScraper } from '@/providers/sources/zoechip';
import { smashyStreamDScraper } from './embeds/smashystream/dued';
@ -27,6 +30,7 @@ export function gatherAllSources(): Array<Sourcerer> {
showboxScraper,
goMoviesScraper,
zoechipScraper,
vidsrcScraper,
lookmovieScraper,
smashyStreamScraper,
];
@ -42,6 +46,8 @@ export function gatherAllEmbeds(): Array<Embed> {
febboxMp4Scraper,
febboxHlsScraper,
mixdropScraper,
vidsrcembedScraper,
streambucketScraper,
smashyStreamFScraper,
smashyStreamDScraper,
];

View File

@ -0,0 +1,101 @@
import { flags } from '@/entrypoint/utils/targets';
import { makeEmbed } from '@/providers/base';
// StreamBucket makes use of https://github.com/nicxlau/hunter-php-javascript-obfuscator
const hunterRegex = /eval\(function\(h,u,n,t,e,r\).*?\("(.*?)",\d*?,"(.*?)",(\d*?),(\d*?),\d*?\)\)/;
const linkRegex = /file:"(.*?)"/;
// This is a much more simple and optimized version of the "h,u,n,t,e,r"
// obfuscation algorithm. It's just basic chunked+mask encoding.
// I have seen this same encoding used on some sites under the name
// "p,l,a,y,e,r" as well
function decodeHunter(encoded: string, mask: string, charCodeOffset: number, delimiterOffset: number) {
// The encoded string is made up of 'n' number of chunks.
// Each chunk is separated by a delimiter inside the mask.
// This offset is also used as the exponentiation base in
// the charCode calculations
const delimiter = mask[delimiterOffset];
// Split the 'encoded' string into chunks using the delimiter,
// and filter out any empty chunks.
const chunks = encoded.split(delimiter).filter((chunk) => chunk);
// Decode each chunk and concatenate the results to form the final 'decoded' string.
const decoded = chunks
.map((chunk) => {
// Chunks are in reverse order. 'reduceRight' removes the
// need to 'reverse' the array first
const charCode = chunk.split('').reduceRight((c, value, index) => {
// Calculate the character code for each character in the chunk.
// This involves finding the index of 'value' in the 'mask' and
// multiplying it by (delimiterOffset^position).
return c + mask.indexOf(value) * delimiterOffset ** (chunk.length - 1 - index);
}, 0);
// The actual character code is offset by the given amount
return String.fromCharCode(charCode - charCodeOffset);
})
.join('');
return decoded;
}
export const streambucketScraper = makeEmbed({
id: 'streambucket',
name: 'StreamBucket',
rank: 196,
// TODO - Disabled until ctx.fetcher and ctx.proxiedFetcher don't trigger bot detection
disabled: true,
async scrape(ctx) {
// Using the context fetchers make the site return just the string "No bots please!"?
// TODO - Fix this. Native fetch does not trigger this. No idea why right now
const response = await fetch(ctx.url);
const html = await response.text();
// This is different than the above mentioned bot detection
if (html.includes('captcha-checkbox')) {
// TODO - This doesn't use recaptcha, just really basic "image match". Maybe could automate?
throw new Error('StreamBucket got captchaed');
}
let regexResult = html.match(hunterRegex);
if (!regexResult) {
throw new Error('Failed to find StreamBucket hunter JavaScript');
}
const encoded = regexResult[1];
const mask = regexResult[2];
const charCodeOffset = Number(regexResult[3]);
const delimiterOffset = Number(regexResult[4]);
if (Number.isNaN(charCodeOffset)) {
throw new Error('StreamBucket hunter JavaScript charCodeOffset is not a valid number');
}
if (Number.isNaN(delimiterOffset)) {
throw new Error('StreamBucket hunter JavaScript delimiterOffset is not a valid number');
}
const decoded = decodeHunter(encoded, mask, charCodeOffset, delimiterOffset);
regexResult = decoded.match(linkRegex);
if (!regexResult) {
throw new Error('Failed to find StreamBucket HLS link');
}
return {
stream: [
{
id: 'primary',
type: 'hls',
playlist: regexResult[1],
flags: [flags.CORS_ALLOWED],
captions: [],
},
],
};
},
});

View File

@ -0,0 +1,55 @@
import { flags } from '@/entrypoint/utils/targets';
import { makeEmbed } from '@/providers/base';
const hlsURLRegex = /file:"(.*?)"/;
const setPassRegex = /var pass_path = "(.*set_pass\.php.*)";/;
export const vidsrcembedScraper = makeEmbed({
id: 'vidsrcembed', // VidSrc is both a source and an embed host
name: 'VidSrc',
rank: 197,
async scrape(ctx) {
const html = await ctx.proxiedFetcher<string>(ctx.url, {
headers: {
referer: ctx.url,
},
});
const match = html
.match(hlsURLRegex)?.[1]
?.replace(/(\/\/\S+?=)/g, '')
.replace('#2', '');
if (!match) throw new Error('Unable to find HLS playlist');
const finalUrl = atob(match);
if (!finalUrl.includes('.m3u8')) throw new Error('Unable to find HLS playlist');
let setPassLink = html.match(setPassRegex)?.[1];
if (!setPassLink) throw new Error('Unable to find set_pass.php link');
if (setPassLink.startsWith('//')) {
setPassLink = `https:${setPassLink}`;
}
// VidSrc uses a password endpoint to temporarily whitelist the user's IP. This is called in an interval by the player.
// It currently has no effect on the player itself, the content plays fine without it.
// In the future we might have to introduce hooks for the frontend to call this endpoint.
await ctx.proxiedFetcher(setPassLink, {
headers: {
referer: ctx.url,
},
});
return {
stream: [
{
id: 'primary',
type: 'hls',
playlist: finalUrl,
flags: [flags.CORS_ALLOWED],
captions: [],
},
],
};
},
});

View File

@ -0,0 +1,2 @@
export const vidsrcBase = 'https://vidsrc.me';
export const vidsrcRCPBase = 'https://rcp.vidsrc.me';

View File

@ -0,0 +1,13 @@
import { flags } from '@/entrypoint/utils/targets';
import { makeSourcerer } from '@/providers/base';
import { scrapeMovie } from '@/providers/sources/vidsrc/scrape-movie';
import { scrapeShow } from '@/providers/sources/vidsrc/scrape-show';
export const vidsrcScraper = makeSourcerer({
id: 'vidsrc',
name: 'VidSrc',
rank: 120,
flags: [flags.CORS_ALLOWED],
scrapeMovie,
scrapeShow,
});

View File

@ -0,0 +1,8 @@
import { getVidSrcMovieSources } from '@/providers/sources/vidsrc/scrape';
import { MovieScrapeContext } from '@/utils/context';
export async function scrapeMovie(ctx: MovieScrapeContext) {
return {
embeds: await getVidSrcMovieSources(ctx),
};
}

View File

@ -0,0 +1,8 @@
import { getVidSrcShowSources } from '@/providers/sources/vidsrc/scrape';
import { ShowScrapeContext } from '@/utils/context';
export async function scrapeShow(ctx: ShowScrapeContext) {
return {
embeds: await getVidSrcShowSources(ctx),
};
}

View File

@ -0,0 +1,133 @@
import { load } from 'cheerio';
import { SourcererEmbed } from '@/providers/base';
import { streambucketScraper } from '@/providers/embeds/streambucket';
import { vidsrcembedScraper } from '@/providers/embeds/vidsrc';
import { vidsrcBase, vidsrcRCPBase } from '@/providers/sources/vidsrc/common';
import { MovieScrapeContext, ShowScrapeContext } from '@/utils/context';
function decodeSrc(encoded: string, seed: string) {
let decoded = '';
const seedLength = seed.length;
for (let i = 0; i < encoded.length; i += 2) {
const byte = parseInt(encoded.substr(i, 2), 16);
const seedChar = seed.charCodeAt((i / 2) % seedLength);
decoded += String.fromCharCode(byte ^ seedChar);
}
return decoded;
}
async function getVidSrcEmbeds(ctx: MovieScrapeContext | ShowScrapeContext, startingURL: string) {
// VidSrc works by using hashes and a redirect system.
// The hashes are stored in the html, and VidSrc will
// make requests to their servers with the hash. This
// will trigger a 302 response with a Location header
// sending the user to the correct embed. To get the
// real embed links, we must do the same. Slow, but
// required
const embeds: SourcererEmbed[] = [];
let html = await ctx.proxiedFetcher<string>(startingURL, {
baseUrl: vidsrcBase,
});
let $ = load(html);
const sourceHashes = $('.server[data-hash]')
.toArray()
.map((el) => $(el).attr('data-hash'))
.filter((hash) => hash !== undefined);
for (const hash of sourceHashes) {
html = await ctx.proxiedFetcher<string>(`/rcp/${hash}`, {
baseUrl: vidsrcRCPBase,
headers: {
referer: vidsrcBase,
},
});
$ = load(html);
const encoded = $('#hidden').attr('data-h');
const seed = $('body').attr('data-i');
if (!encoded || !seed) {
throw new Error('Failed to find encoded iframe src');
}
let redirectURL = decodeSrc(encoded, seed);
if (redirectURL.startsWith('//')) {
redirectURL = `https:${redirectURL}`;
}
const { finalUrl } = await ctx.proxiedFetcher.full(redirectURL, {
method: 'HEAD',
headers: {
referer: vidsrcBase,
},
});
const embed: SourcererEmbed = {
embedId: '',
url: finalUrl,
};
const parsedUrl = new URL(finalUrl);
switch (parsedUrl.host) {
case 'vidsrc.stream':
embed.embedId = vidsrcembedScraper.id;
break;
case 'streambucket.net':
embed.embedId = streambucketScraper.id;
break;
case '2embed.cc':
case 'www.2embed.cc':
// Just ignore this. This embed just sources from other embeds we can scrape as a 'source'
break;
case 'player-cdn.com':
// Just ignore this. This embed streams video over a custom WebSocket connection
break;
default:
throw new Error(`Failed to find VidSrc embed source for ${finalUrl}`);
}
// Since some embeds are ignored on purpose, check if a valid one was found
if (embed.embedId !== '') {
embeds.push(embed);
}
}
return embeds;
}
export async function getVidSrcMovieSources(ctx: MovieScrapeContext) {
return getVidSrcEmbeds(ctx, `/embed/${ctx.media.tmdbId}`);
}
export async function getVidSrcShowSources(ctx: ShowScrapeContext) {
// VidSrc will always default to season 1 episode 1
// no matter what embed URL is used. It sends back
// a list of ALL the shows episodes, in order, for
// all seasons. To get the real embed URL, have to
// parse this from the response
const html = await ctx.proxiedFetcher<string>(`/embed/${ctx.media.tmdbId}`, {
baseUrl: vidsrcBase,
});
const $ = load(html);
const episodeElement = $(`.ep[data-s="${ctx.media.season.number}"][data-e="${ctx.media.episode.number}"]`).first();
if (episodeElement.length === 0) {
throw new Error('failed to find episode element');
}
const startingURL = episodeElement.attr('data-iframe');
if (!startingURL) {
throw new Error('failed to find episode starting URL');
}
return getVidSrcEmbeds(ctx, startingURL);
}