Files
LetsBeBiz-Site/src/lib/site-analysis.ts

317 lines
9.9 KiB
TypeScript
Raw Normal View History

import * as cheerio from 'cheerio'
// ─── Types ───────────────────────────────────────────────────────────────────
export interface TechStack {
cms: string | null
framework: string | null
ecommerce: string | null
analytics: string[]
hosting: string | null
}
export interface PerformanceMetrics {
score: number
fcp: number
lcp: number
cls: number
tbt: number
speedIndex: number
}
export interface SiteAnalysis {
url: string
fetchedAt: string
title: string | null
description: string | null
themeColor: string | null
primaryColors: string[]
headingStructure: { h1: string[]; h2: string[] }
navLinks: string[]
hasForms: boolean
techStack: TechStack | null
performance: PerformanceMetrics | null
fetchError: string | null
}
// ─── Internal result types ────────────────────────────────────────────────────
interface ParsedHtml {
title: string | null
description: string | null
themeColor: string | null
primaryColors: string[]
headingStructure: { h1: string[]; h2: string[] }
navLinks: string[]
hasForms: boolean
}
// ─── HTML parser ─────────────────────────────────────────────────────────────
function parseHtml(html: string): ParsedHtml {
const $ = cheerio.load(html)
const title = $('title').first().text().trim() || null
const description =
$('meta[name="description"]').attr('content')?.trim() ?? null
const themeColor =
$('meta[name="theme-color"]').attr('content')?.trim() ?? null
const colorPattern = /(#[0-9a-fA-F]{3,8})|rgb[a]?\(\s*\d[\d\s,./%]*\)/g
const colorSet = new Set<string>()
$('style').each((_, el) => {
const text = $(el).text()
const matches = text.match(colorPattern)
if (matches) matches.forEach(c => colorSet.add(c))
})
$('[style]').each((_, el) => {
const style = $(el).attr('style') ?? ''
const matches = style.match(colorPattern)
if (matches) matches.forEach(c => colorSet.add(c))
})
const primaryColors = [...colorSet].slice(0, 8)
const h1: string[] = []
$('h1').each((_, el) => {
if (h1.length < 3) h1.push($(el).text().trim())
})
const h2: string[] = []
$('h2').each((_, el) => {
if (h2.length < 3) h2.push($(el).text().trim())
})
const navLinks: string[] = []
$('nav').first().find('a').each((_, el) => {
const text = $(el).text().trim()
if (text && navLinks.length < 10) navLinks.push(text)
})
const hasForms = $('form').length > 0
return { title, description, themeColor, primaryColors, headingStructure: { h1, h2 }, navLinks, hasForms }
}
// ─── Tech stack detector ──────────────────────────────────────────────────────
function detectStack(html: string, headers: Record<string, string>): TechStack {
const h = html.toLowerCase()
const headerLower: Record<string, string> = {}
for (const [k, v] of Object.entries(headers)) {
headerLower[k.toLowerCase()] = v.toLowerCase()
}
// CMS
let cms: string | null = null
if (
h.includes('wp-content/') ||
(headerLower['x-powered-by']?.includes('php') && h.includes('wp-json'))
) {
cms = 'WordPress'
} else if (h.includes('cdn.shopify.com') || h.includes('shopify.theme')) {
cms = 'Shopify'
} else if (
h.includes('wixsite.com') ||
Object.keys(headerLower).some(k => k.includes('x-wix'))
) {
cms = 'Wix'
} else if (h.includes('static1.squarespace.com') || h.includes('squarespace-cdn')) {
cms = 'Squarespace'
} else if (h.includes('webflow.io') || h.includes('data-wf-site')) {
cms = 'Webflow'
} else if (h.includes('/media/jui/') || h.includes('joomla')) {
cms = 'Joomla'
} else if (
h.includes('/sites/default/files/') ||
headerLower['x-generator']?.includes('drupal')
) {
cms = 'Drupal'
} else if (h.includes('ghost.io') || h.includes('content="ghost')) {
cms = 'Ghost'
}
// Framework
let framework: string | null = null
if (h.includes('__next_data__') || h.includes('_next/static')) {
framework = 'Next.js'
} else if (h.includes('__nuxt__') || h.includes('_nuxt/')) {
framework = 'Nuxt'
} else if (!framework && (h.includes('data-reactroot') || h.includes('react-root'))) {
framework = 'React'
} else if (!framework && h.includes('data-v-')) {
framework = 'Vue'
} else if (h.includes('ng-version')) {
framework = 'Angular'
}
// Ecommerce
let ecommerce: string | null = null
if (h.includes('woocommerce')) {
ecommerce = 'WooCommerce'
} else if (h.includes('prestashop')) {
ecommerce = 'PrestaShop'
} else if (h.includes('mage.cookies') || h.includes('skin/frontend')) {
ecommerce = 'Magento'
}
// Analytics (collect all)
const analytics: string[] = []
if (h.includes('gtag') && /\/g-[a-z0-9]+\//i.test(html)) {
analytics.push('Google Analytics 4')
}
if (h.includes('googletagmanager.com')) {
analytics.push('Google Tag Manager')
}
if (h.includes('hotjar.com')) {
analytics.push('Hotjar')
}
if (h.includes('matomo.js') || h.includes('piwik.js')) {
analytics.push('Matomo')
}
if (h.includes('fbq(')) {
analytics.push('Facebook Pixel')
}
// Hosting
let hosting: string | null = null
if ('cf-ray' in headerLower) {
hosting = 'Cloudflare'
} else if (Object.keys(headerLower).some(k => k.startsWith('x-vercel'))) {
hosting = 'Vercel'
} else if ('x-nf-request-id' in headerLower) {
hosting = 'Netlify'
} else if (
'wpe-backend' in headerLower ||
headerLower['server']?.includes('wpe')
) {
hosting = 'WP Engine'
}
return { cms, framework, ecommerce, analytics, hosting }
}
// ─── PageSpeed fetcher ────────────────────────────────────────────────────────
async function fetchPageSpeed(url: string): Promise<PerformanceMetrics | null> {
try {
const apiUrl = `https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url=${encodeURIComponent(url)}&strategy=mobile`
const res = await fetch(apiUrl)
const json = await res.json() as Record<string, unknown>
const lr = json['lighthouseResult'] as Record<string, unknown>
const categories = lr['categories'] as Record<string, Record<string, unknown>>
const audits = lr['audits'] as Record<string, Record<string, unknown>>
const score = Math.round((categories['performance']['score'] as number) * 100)
const fcp = audits['first-contentful-paint']['numericValue'] as number
const lcp = audits['largest-contentful-paint']['numericValue'] as number
const cls = audits['cumulative-layout-shift']['numericValue'] as number
const tbt = audits['total-blocking-time']['numericValue'] as number
const speedIndex = audits['speed-index']['numericValue'] as number
return { score, fcp, lcp, cls, tbt, speedIndex }
} catch {
return null
}
}
// ─── URL validation ───────────────────────────────────────────────────────────
function normalizeUrl(input: string): string {
const trimmed = input.trim()
if (!/^https?:\/\//i.test(trimmed)) {
return `https://${trimmed}`
}
return trimmed
}
function isHttpUrl(input: string): boolean {
return /^https?:\/\//i.test(input)
}
// ─── Main export ──────────────────────────────────────────────────────────────
export async function analyzeSite(url: string): Promise<SiteAnalysis> {
const normalizedUrl = normalizeUrl(url)
const fetchedAt = new Date().toISOString()
const base: SiteAnalysis = {
url: normalizedUrl,
fetchedAt,
title: null,
description: null,
themeColor: null,
primaryColors: [],
headingStructure: { h1: [], h2: [] },
navLinks: [],
hasForms: false,
techStack: null,
performance: null,
fetchError: null,
}
if (!isHttpUrl(normalizedUrl)) {
return { ...base, fetchError: 'Invalid URL: only http and https schemes are supported.' }
}
let html: string
let headers: Record<string, string>
try {
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), 5000)
const response = await fetch(normalizedUrl, {
signal: controller.signal,
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SiteAnalyzer/1.0)' },
})
clearTimeout(timeout)
html = await response.text()
headers = {}
response.headers.forEach((value, key) => {
headers[key.toLowerCase()] = value
})
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
return { ...base, fetchError: message }
}
const [htmlResult, stackResult, perfResult] = await Promise.allSettled([
Promise.resolve(parseHtml(html)),
Promise.resolve(detectStack(html, headers)),
fetchPageSpeed(normalizedUrl),
])
const parsed = htmlResult.status === 'fulfilled' ? htmlResult.value : null
const stack = stackResult.status === 'fulfilled' ? stackResult.value : null
const perf = perfResult.status === 'fulfilled' ? perfResult.value : null
return {
url: normalizedUrl,
fetchedAt,
title: parsed?.title ?? null,
description: parsed?.description ?? null,
themeColor: parsed?.themeColor ?? null,
primaryColors: parsed?.primaryColors ?? [],
headingStructure: parsed?.headingStructure ?? { h1: [], h2: [] },
navLinks: parsed?.navLinks ?? [],
hasForms: parsed?.hasForms ?? false,
techStack: stack,
performance: perf,
fetchError: null,
}
}