import * as cheerio from 'cheerio' // ─── Types ─────────────────────────────────────────────────────────────────── export interface TechStack { cms: string | null framework: string | null ecommerce: string | null analytics: string[] hosting: string | null } export interface PerformanceMetrics { score: number fcp: number lcp: number cls: number tbt: number speedIndex: number } export interface SiteAnalysis { url: string fetchedAt: string title: string | null description: string | null themeColor: string | null primaryColors: string[] headingStructure: { h1: string[]; h2: string[] } navLinks: string[] hasForms: boolean techStack: TechStack | null performance: PerformanceMetrics | null fetchError: string | null } // ─── Internal result types ──────────────────────────────────────────────────── interface ParsedHtml { title: string | null description: string | null themeColor: string | null primaryColors: string[] headingStructure: { h1: string[]; h2: string[] } navLinks: string[] hasForms: boolean } // ─── HTML parser ───────────────────────────────────────────────────────────── function parseHtml(html: string): ParsedHtml { const $ = cheerio.load(html) const title = $('title').first().text().trim() || null const description = $('meta[name="description"]').attr('content')?.trim() ?? null const themeColor = $('meta[name="theme-color"]').attr('content')?.trim() ?? null const colorPattern = /(#[0-9a-fA-F]{3,8})|rgb[a]?\(\s*\d[\d\s,./%]*\)/g const colorSet = new Set() $('style').each((_, el) => { const text = $(el).text() const matches = text.match(colorPattern) if (matches) matches.forEach(c => colorSet.add(c)) }) $('[style]').each((_, el) => { const style = $(el).attr('style') ?? '' const matches = style.match(colorPattern) if (matches) matches.forEach(c => colorSet.add(c)) }) const primaryColors = [...colorSet].slice(0, 8) const h1: string[] = [] $('h1').each((_, el) => { if (h1.length < 3) h1.push($(el).text().trim()) }) const h2: string[] = [] $('h2').each((_, el) => { if (h2.length < 3) h2.push($(el).text().trim()) }) const navLinks: string[] = [] $('nav').first().find('a').each((_, el) => { const text = $(el).text().trim() if (text && navLinks.length < 10) navLinks.push(text) }) const hasForms = $('form').length > 0 return { title, description, themeColor, primaryColors, headingStructure: { h1, h2 }, navLinks, hasForms } } // ─── Tech stack detector ────────────────────────────────────────────────────── function detectStack(html: string, headers: Record): TechStack { const h = html.toLowerCase() const headerLower: Record = {} for (const [k, v] of Object.entries(headers)) { headerLower[k.toLowerCase()] = v.toLowerCase() } // CMS let cms: string | null = null if ( h.includes('wp-content/') || (headerLower['x-powered-by']?.includes('php') && h.includes('wp-json')) ) { cms = 'WordPress' } else if (h.includes('cdn.shopify.com') || h.includes('shopify.theme')) { cms = 'Shopify' } else if ( h.includes('wixsite.com') || Object.keys(headerLower).some(k => k.includes('x-wix')) ) { cms = 'Wix' } else if (h.includes('static1.squarespace.com') || h.includes('squarespace-cdn')) { cms = 'Squarespace' } else if (h.includes('webflow.io') || h.includes('data-wf-site')) { cms = 'Webflow' } else if (h.includes('/media/jui/') || h.includes('joomla')) { cms = 'Joomla' } else if ( h.includes('/sites/default/files/') || headerLower['x-generator']?.includes('drupal') ) { cms = 'Drupal' } else if (h.includes('ghost.io') || h.includes('content="ghost')) { cms = 'Ghost' } // Framework let framework: string | null = null if (h.includes('__next_data__') || h.includes('_next/static')) { framework = 'Next.js' } else if (h.includes('__nuxt__') || h.includes('_nuxt/')) { framework = 'Nuxt' } else if (!framework && (h.includes('data-reactroot') || h.includes('react-root'))) { framework = 'React' } else if (!framework && h.includes('data-v-')) { framework = 'Vue' } else if (h.includes('ng-version')) { framework = 'Angular' } // Ecommerce let ecommerce: string | null = null if (h.includes('woocommerce')) { ecommerce = 'WooCommerce' } else if (h.includes('prestashop')) { ecommerce = 'PrestaShop' } else if (h.includes('mage.cookies') || h.includes('skin/frontend')) { ecommerce = 'Magento' } // Analytics (collect all) const analytics: string[] = [] if (h.includes('gtag') && /\/g-[a-z0-9]+\//i.test(html)) { analytics.push('Google Analytics 4') } if (h.includes('googletagmanager.com')) { analytics.push('Google Tag Manager') } if (h.includes('hotjar.com')) { analytics.push('Hotjar') } if (h.includes('matomo.js') || h.includes('piwik.js')) { analytics.push('Matomo') } if (h.includes('fbq(')) { analytics.push('Facebook Pixel') } // Hosting let hosting: string | null = null if ('cf-ray' in headerLower) { hosting = 'Cloudflare' } else if (Object.keys(headerLower).some(k => k.startsWith('x-vercel'))) { hosting = 'Vercel' } else if ('x-nf-request-id' in headerLower) { hosting = 'Netlify' } else if ( 'wpe-backend' in headerLower || headerLower['server']?.includes('wpe') ) { hosting = 'WP Engine' } return { cms, framework, ecommerce, analytics, hosting } } // ─── PageSpeed fetcher ──────────────────────────────────────────────────────── async function fetchPageSpeed(url: string): Promise { try { const apiUrl = `https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url=${encodeURIComponent(url)}&strategy=mobile` const res = await fetch(apiUrl) const json = await res.json() as Record const lr = json['lighthouseResult'] as Record const categories = lr['categories'] as Record> const audits = lr['audits'] as Record> const score = Math.round((categories['performance']['score'] as number) * 100) const fcp = audits['first-contentful-paint']['numericValue'] as number const lcp = audits['largest-contentful-paint']['numericValue'] as number const cls = audits['cumulative-layout-shift']['numericValue'] as number const tbt = audits['total-blocking-time']['numericValue'] as number const speedIndex = audits['speed-index']['numericValue'] as number return { score, fcp, lcp, cls, tbt, speedIndex } } catch { return null } } // ─── URL validation ─────────────────────────────────────────────────────────── function normalizeUrl(input: string): string { const trimmed = input.trim() if (!/^https?:\/\//i.test(trimmed)) { return `https://${trimmed}` } return trimmed } function isHttpUrl(input: string): boolean { return /^https?:\/\//i.test(input) } // ─── Main export ────────────────────────────────────────────────────────────── export async function analyzeSite(url: string): Promise { const normalizedUrl = normalizeUrl(url) const fetchedAt = new Date().toISOString() const base: SiteAnalysis = { url: normalizedUrl, fetchedAt, title: null, description: null, themeColor: null, primaryColors: [], headingStructure: { h1: [], h2: [] }, navLinks: [], hasForms: false, techStack: null, performance: null, fetchError: null, } if (!isHttpUrl(normalizedUrl)) { return { ...base, fetchError: 'Invalid URL: only http and https schemes are supported.' } } let html: string let headers: Record try { const controller = new AbortController() const timeout = setTimeout(() => controller.abort(), 5000) const response = await fetch(normalizedUrl, { signal: controller.signal, headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SiteAnalyzer/1.0)' }, }) clearTimeout(timeout) html = await response.text() headers = {} response.headers.forEach((value, key) => { headers[key.toLowerCase()] = value }) } catch (err) { const message = err instanceof Error ? err.message : String(err) return { ...base, fetchError: message } } const [htmlResult, stackResult, perfResult] = await Promise.allSettled([ Promise.resolve(parseHtml(html)), Promise.resolve(detectStack(html, headers)), fetchPageSpeed(normalizedUrl), ]) const parsed = htmlResult.status === 'fulfilled' ? htmlResult.value : null const stack = stackResult.status === 'fulfilled' ? stackResult.value : null const perf = perfResult.status === 'fulfilled' ? perfResult.value : null return { url: normalizedUrl, fetchedAt, title: parsed?.title ?? null, description: parsed?.description ?? null, themeColor: parsed?.themeColor ?? null, primaryColors: parsed?.primaryColors ?? [], headingStructure: parsed?.headingStructure ?? { h1: [], h2: [] }, navLinks: parsed?.navLinks ?? [], hasForms: parsed?.hasForms ?? false, techStack: stack, performance: perf, fetchError: null, } }