Merge pull request #47 from JorrinKievit/pr-14-v2

Added VidSrc and StreamBucket Part 2
2024-01-05 19:14:56 +01:00 · 2024-01-05 19:14:56 +01:00 · 5199213a70
parent 1d2dbcf903 4e4b405398
commit 5199213a70
11 changed files with 331 additions and 2 deletions
--- a/.eslintrc.js
+++ b/.eslintrc.js
@ -18,6 +18,8 @@ module.exports = {
  },
  plugins: ['@typescript-eslint', 'import', 'prettier'],
  rules: {
+    'no-plusplus': 'off',
+    'no-bitwise': 'off',
    'no-underscore-dangle': 'off',
    '@typescript-eslint/no-explicit-any': 'off',
    'no-console': 'off',
--- a/src/dev-cli/scraper.ts
+++ b/src/dev-cli/scraper.ts
@ -41,6 +41,7 @@ async function runBrowserScraping(
      args: ['--no-sandbox', '--disable-setuid-sandbox'],
    });
    const page = await browser.newPage();
+    page.on('console', (message) => console.log(`${message.type().slice(0, 3).toUpperCase()} ${message.text()}`));
    await page.goto(server.resolvedUrls.local[0]);
    await page.waitForFunction('!!window.scrape', { timeout: 5000 });

--- a/src/fetchers/types.ts
+++ b/src/fetchers/types.ts
@ -4,7 +4,7 @@ export type FetcherOptions = {
  baseUrl?: string;
  headers?: Record<string, string>;
  query?: Record<string, string>;
-  method?: 'GET' | 'POST';
+  method?: 'HEAD' | 'GET' | 'POST';
  readHeaders?: string[];
  body?: Record<string, any> | string | FormData | URLSearchParams;
 };
@ -17,7 +17,7 @@ export type DefaultedFetcherOptions = {
  headers: Record<string, string>;
  query: Record<string, string>;
  readHeaders: string[];
-  method: 'GET' | 'POST';
+  method: 'HEAD' | 'GET' | 'POST';
 };

 export type FetcherResponse<T = any> = {
--- a/src/providers/all.ts
+++ b/src/providers/all.ts
@ -3,15 +3,18 @@ import { febboxHlsScraper } from '@/providers/embeds/febbox/hls';
 import { febboxMp4Scraper } from '@/providers/embeds/febbox/mp4';
 import { mixdropScraper } from '@/providers/embeds/mixdrop';
 import { mp4uploadScraper } from '@/providers/embeds/mp4upload';
+import { streambucketScraper } from '@/providers/embeds/streambucket';
 import { streamsbScraper } from '@/providers/embeds/streamsb';
 import { upcloudScraper } from '@/providers/embeds/upcloud';
 import { upstreamScraper } from '@/providers/embeds/upstream';
+import { vidsrcembedScraper } from '@/providers/embeds/vidsrc';
 import { flixhqScraper } from '@/providers/sources/flixhq/index';
 import { goMoviesScraper } from '@/providers/sources/gomovies/index';
 import { kissAsianScraper } from '@/providers/sources/kissasian/index';
 import { lookmovieScraper } from '@/providers/sources/lookmovie';
 import { remotestreamScraper } from '@/providers/sources/remotestream';
 import { showboxScraper } from '@/providers/sources/showbox/index';
+import { vidsrcScraper } from '@/providers/sources/vidsrc/index';
 import { zoechipScraper } from '@/providers/sources/zoechip';

 import { smashyStreamDScraper } from './embeds/smashystream/dued';
@ -27,6 +30,7 @@ export function gatherAllSources(): Array<Sourcerer> {
    showboxScraper,
    goMoviesScraper,
    zoechipScraper,
+    vidsrcScraper,
    lookmovieScraper,
    smashyStreamScraper,
  ];
@ -42,6 +46,8 @@ export function gatherAllEmbeds(): Array<Embed> {
    febboxMp4Scraper,
    febboxHlsScraper,
    mixdropScraper,
+    vidsrcembedScraper,
+    streambucketScraper,
    smashyStreamFScraper,
    smashyStreamDScraper,
  ];
--- a/src/providers/embeds/streambucket.ts
+++ b/src/providers/embeds/streambucket.ts
@ -0,0 +1,101 @@
+import { flags } from '@/entrypoint/utils/targets';
+import { makeEmbed } from '@/providers/base';
+
+// StreamBucket makes use of https://github.com/nicxlau/hunter-php-javascript-obfuscator
+
+const hunterRegex = /eval\(function\(h,u,n,t,e,r\).*?\("(.*?)",\d*?,"(.*?)",(\d*?),(\d*?),\d*?\)\)/;
+const linkRegex = /file:"(.*?)"/;
+
+// This is a much more simple and optimized version of the "h,u,n,t,e,r"
+// obfuscation algorithm. It's just basic chunked+mask encoding.
+// I have seen this same encoding used on some sites under the name
+// "p,l,a,y,e,r" as well
+function decodeHunter(encoded: string, mask: string, charCodeOffset: number, delimiterOffset: number) {
+  // The encoded string is made up of 'n' number of chunks.
+  // Each chunk is separated by a delimiter inside the mask.
+  // This offset is also used as the exponentiation base in
+  // the charCode calculations
+  const delimiter = mask[delimiterOffset];
+
+  // Split the 'encoded' string into chunks using the delimiter,
+  // and filter out any empty chunks.
+  const chunks = encoded.split(delimiter).filter((chunk) => chunk);
+
+  // Decode each chunk and concatenate the results to form the final 'decoded' string.
+  const decoded = chunks
+    .map((chunk) => {
+      // Chunks are in reverse order. 'reduceRight' removes the
+      // need to 'reverse' the array first
+      const charCode = chunk.split('').reduceRight((c, value, index) => {
+        // Calculate the character code for each character in the chunk.
+        // This involves finding the index of 'value' in the 'mask' and
+        // multiplying it by (delimiterOffset^position).
+        return c + mask.indexOf(value) * delimiterOffset ** (chunk.length - 1 - index);
+      }, 0);
+
+      // The actual character code is offset by the given amount
+      return String.fromCharCode(charCode - charCodeOffset);
+    })
+    .join('');
+
+  return decoded;
+}
+
+export const streambucketScraper = makeEmbed({
+  id: 'streambucket',
+  name: 'StreamBucket',
+  rank: 196,
+  // TODO - Disabled until ctx.fetcher and ctx.proxiedFetcher don't trigger bot detection
+  disabled: true,
+  async scrape(ctx) {
+    // Using the context fetchers make the site return just the string "No bots please!"?
+    // TODO - Fix this. Native fetch does not trigger this. No idea why right now
+    const response = await fetch(ctx.url);
+    const html = await response.text();
+
+    // This is different than the above mentioned bot detection
+    if (html.includes('captcha-checkbox')) {
+      // TODO - This doesn't use recaptcha, just really basic "image match". Maybe could automate?
+      throw new Error('StreamBucket got captchaed');
+    }
+
+    let regexResult = html.match(hunterRegex);
+
+    if (!regexResult) {
+      throw new Error('Failed to find StreamBucket hunter JavaScript');
+    }
+
+    const encoded = regexResult[1];
+    const mask = regexResult[2];
+    const charCodeOffset = Number(regexResult[3]);
+    const delimiterOffset = Number(regexResult[4]);
+
+    if (Number.isNaN(charCodeOffset)) {
+      throw new Error('StreamBucket hunter JavaScript charCodeOffset is not a valid number');
+    }
+
+    if (Number.isNaN(delimiterOffset)) {
+      throw new Error('StreamBucket hunter JavaScript delimiterOffset is not a valid number');
+    }
+
+    const decoded = decodeHunter(encoded, mask, charCodeOffset, delimiterOffset);
+
+    regexResult = decoded.match(linkRegex);
+
+    if (!regexResult) {
+      throw new Error('Failed to find StreamBucket HLS link');
+    }
+
+    return {
+      stream: [
+        {
+          id: 'primary',
+          type: 'hls',
+          playlist: regexResult[1],
+          flags: [flags.CORS_ALLOWED],
+          captions: [],
+        },
+      ],
+    };
+  },
+});
--- a/src/providers/embeds/vidsrc.ts
+++ b/src/providers/embeds/vidsrc.ts
@ -0,0 +1,55 @@
+import { flags } from '@/entrypoint/utils/targets';
+import { makeEmbed } from '@/providers/base';
+
+const hlsURLRegex = /file:"(.*?)"/;
+const setPassRegex = /var pass_path = "(.*set_pass\.php.*)";/;
+
+export const vidsrcembedScraper = makeEmbed({
+  id: 'vidsrcembed', // VidSrc is both a source and an embed host
+  name: 'VidSrc',
+  rank: 197,
+  async scrape(ctx) {
+    const html = await ctx.proxiedFetcher<string>(ctx.url, {
+      headers: {
+        referer: ctx.url,
+      },
+    });
+
+    const match = html
+      .match(hlsURLRegex)?.[1]
+      ?.replace(/(\/\/\S+?=)/g, '')
+      .replace('#2', '');
+    if (!match) throw new Error('Unable to find HLS playlist');
+    const finalUrl = atob(match);
+
+    if (!finalUrl.includes('.m3u8')) throw new Error('Unable to find HLS playlist');
+
+    let setPassLink = html.match(setPassRegex)?.[1];
+    if (!setPassLink) throw new Error('Unable to find set_pass.php link');
+
+    if (setPassLink.startsWith('//')) {
+      setPassLink = `https:${setPassLink}`;
+    }
+
+    // VidSrc uses a password endpoint to temporarily whitelist the user's IP. This is called in an interval by the player.
+    // It currently has no effect on the player itself, the content plays fine without it.
+    // In the future we might have to introduce hooks for the frontend to call this endpoint.
+    await ctx.proxiedFetcher(setPassLink, {
+      headers: {
+        referer: ctx.url,
+      },
+    });
+
+    return {
+      stream: [
+        {
+          id: 'primary',
+          type: 'hls',
+          playlist: finalUrl,
+          flags: [flags.CORS_ALLOWED],
+          captions: [],
+        },
+      ],
+    };
+  },
+});
--- a/src/providers/sources/vidsrc/common.ts
+++ b/src/providers/sources/vidsrc/common.ts
@ -0,0 +1,2 @@
+export const vidsrcBase = 'https://vidsrc.me';
+export const vidsrcRCPBase = 'https://rcp.vidsrc.me';
--- a/src/providers/sources/vidsrc/index.ts
+++ b/src/providers/sources/vidsrc/index.ts
@ -0,0 +1,13 @@
+import { flags } from '@/entrypoint/utils/targets';
+import { makeSourcerer } from '@/providers/base';
+import { scrapeMovie } from '@/providers/sources/vidsrc/scrape-movie';
+import { scrapeShow } from '@/providers/sources/vidsrc/scrape-show';
+
+export const vidsrcScraper = makeSourcerer({
+  id: 'vidsrc',
+  name: 'VidSrc',
+  rank: 120,
+  flags: [flags.CORS_ALLOWED],
+  scrapeMovie,
+  scrapeShow,
+});
--- a/src/providers/sources/vidsrc/scrape-movie.ts
+++ b/src/providers/sources/vidsrc/scrape-movie.ts
@ -0,0 +1,8 @@
+import { getVidSrcMovieSources } from '@/providers/sources/vidsrc/scrape';
+import { MovieScrapeContext } from '@/utils/context';
+
+export async function scrapeMovie(ctx: MovieScrapeContext) {
+  return {
+    embeds: await getVidSrcMovieSources(ctx),
+  };
+}
--- a/src/providers/sources/vidsrc/scrape-show.ts
+++ b/src/providers/sources/vidsrc/scrape-show.ts
@ -0,0 +1,8 @@
+import { getVidSrcShowSources } from '@/providers/sources/vidsrc/scrape';
+import { ShowScrapeContext } from '@/utils/context';
+
+export async function scrapeShow(ctx: ShowScrapeContext) {
+  return {
+    embeds: await getVidSrcShowSources(ctx),
+  };
+}
--- a/src/providers/sources/vidsrc/scrape.ts
+++ b/src/providers/sources/vidsrc/scrape.ts
@ -0,0 +1,133 @@
+import { load } from 'cheerio';
+
+import { SourcererEmbed } from '@/providers/base';
+import { streambucketScraper } from '@/providers/embeds/streambucket';
+import { vidsrcembedScraper } from '@/providers/embeds/vidsrc';
+import { vidsrcBase, vidsrcRCPBase } from '@/providers/sources/vidsrc/common';
+import { MovieScrapeContext, ShowScrapeContext } from '@/utils/context';
+
+function decodeSrc(encoded: string, seed: string) {
+  let decoded = '';
+  const seedLength = seed.length;
+
+  for (let i = 0; i < encoded.length; i += 2) {
+    const byte = parseInt(encoded.substr(i, 2), 16);
+    const seedChar = seed.charCodeAt((i / 2) % seedLength);
+    decoded += String.fromCharCode(byte ^ seedChar);
+  }
+
+  return decoded;
+}
+
+async function getVidSrcEmbeds(ctx: MovieScrapeContext | ShowScrapeContext, startingURL: string) {
+  // VidSrc works by using hashes and a redirect system.
+  // The hashes are stored in the html, and VidSrc will
+  // make requests to their servers with the hash. This
+  // will trigger a 302 response with a Location header
+  // sending the user to the correct embed. To get the
+  // real embed links, we must do the same. Slow, but
+  // required
+
+  const embeds: SourcererEmbed[] = [];
+
+  let html = await ctx.proxiedFetcher<string>(startingURL, {
+    baseUrl: vidsrcBase,
+  });
+
+  let $ = load(html);
+
+  const sourceHashes = $('.server[data-hash]')
+    .toArray()
+    .map((el) => $(el).attr('data-hash'))
+    .filter((hash) => hash !== undefined);
+
+  for (const hash of sourceHashes) {
+    html = await ctx.proxiedFetcher<string>(`/rcp/${hash}`, {
+      baseUrl: vidsrcRCPBase,
+      headers: {
+        referer: vidsrcBase,
+      },
+    });
+
+    $ = load(html);
+    const encoded = $('#hidden').attr('data-h');
+    const seed = $('body').attr('data-i');
+
+    if (!encoded || !seed) {
+      throw new Error('Failed to find encoded iframe src');
+    }
+
+    let redirectURL = decodeSrc(encoded, seed);
+    if (redirectURL.startsWith('//')) {
+      redirectURL = `https:${redirectURL}`;
+    }
+
+    const { finalUrl } = await ctx.proxiedFetcher.full(redirectURL, {
+      method: 'HEAD',
+      headers: {
+        referer: vidsrcBase,
+      },
+    });
+
+    const embed: SourcererEmbed = {
+      embedId: '',
+      url: finalUrl,
+    };
+
+    const parsedUrl = new URL(finalUrl);
+
+    switch (parsedUrl.host) {
+      case 'vidsrc.stream':
+        embed.embedId = vidsrcembedScraper.id;
+        break;
+      case 'streambucket.net':
+        embed.embedId = streambucketScraper.id;
+        break;
+      case '2embed.cc':
+      case 'www.2embed.cc':
+        // Just ignore this. This embed just sources from other embeds we can scrape as a 'source'
+        break;
+      case 'player-cdn.com':
+        // Just ignore this. This embed streams video over a custom WebSocket connection
+        break;
+      default:
+        throw new Error(`Failed to find VidSrc embed source for ${finalUrl}`);
+    }
+
+    // Since some embeds are ignored on purpose, check if a valid one was found
+    if (embed.embedId !== '') {
+      embeds.push(embed);
+    }
+  }
+
+  return embeds;
+}
+
+export async function getVidSrcMovieSources(ctx: MovieScrapeContext) {
+  return getVidSrcEmbeds(ctx, `/embed/${ctx.media.tmdbId}`);
+}
+
+export async function getVidSrcShowSources(ctx: ShowScrapeContext) {
+  // VidSrc will always default to season 1 episode 1
+  // no matter what embed URL is used. It sends back
+  // a list of ALL the shows episodes, in order, for
+  // all seasons. To get the real embed URL, have to
+  // parse this from the response
+  const html = await ctx.proxiedFetcher<string>(`/embed/${ctx.media.tmdbId}`, {
+    baseUrl: vidsrcBase,
+  });
+
+  const $ = load(html);
+
+  const episodeElement = $(`.ep[data-s="${ctx.media.season.number}"][data-e="${ctx.media.episode.number}"]`).first();
+  if (episodeElement.length === 0) {
+    throw new Error('failed to find episode element');
+  }
+
+  const startingURL = episodeElement.attr('data-iframe');
+  if (!startingURL) {
+    throw new Error('failed to find episode starting URL');
+  }
+
+  return getVidSrcEmbeds(ctx, startingURL);
+}