/** * Web Sentiment Analyzer * * This program analyzes the sentiment of web content using a combination of: * - Dictionary-based sentiment analysis * - Emotion categorization * - Intensity analysis * - Web content extraction * - Metadata parsing * * Architecture Overview: * - Factory Pattern: Uses createWebSentimentAnalyzer to create analyzer instances * - Builder Pattern: Configurable through dictionary additions and modifications * - Strategy Pattern: Separates content fetching, analysis, and display logic * - Command Pattern: CLI interface for processing multiple URLs * * @module sentiment-analyzer */ import { JSDOM } from 'jsdom'; /** * Creates a web-enabled sentiment analyzer with extended capabilities * * @param {Object} config - Optional configuration to override default dictionaries * @returns {Object} An analyzer instance with public methods for sentiment analysis * * Extensibility Points: * - Add more dictionaries (e.g., industry-specific terms) * - Enhance emotion categories * - Add language support * - Implement ML-based sentiment analysis */ const createWebSentimentAnalyzer = (config = {}) => { /** * Default configuration with extensive sentiment dictionaries * * @property {Set} positiveWords - Words indicating positive sentiment * @property {Set} negativeWords - Words indicating negative sentiment * @property {Map} intensifiers - Words that modify sentiment intensity * @property {Set} negators - Words that negate sentiment * * Potential Enhancements: * - Add multi-word phrases * - Include context-dependent sentiments * - Add domain-specific dictionaries */ const defaultConfig = { positiveWords: new Set([ // Emotional positives 'love', 'joy', 'happy', 'excited', 'peaceful', 'wonderful', 'fantastic', 'delighted', 'pleased', 'glad', 'cheerful', 'content', 'satisfied', 'grateful', 'thankful', 'blessed', 'optimistic', 'hopeful', // Quality positives 'excellent', 'outstanding', 'superb', 'magnificent', 'brilliant', 'exceptional', 'perfect', 'remarkable', 'spectacular', 'impressive', 'incredible', 'amazing', 'extraordinary', 'marvelous', 'wonderful', // Performance positives 'efficient', 'effective', 'reliable', 'innovative', 'productive', 'successful', 'accomplished', 'achieved', 'improved', 'enhanced', 'optimized', 'streamlined', 'breakthrough', 'revolutionary', // Relationship positives 'friendly', 'helpful', 'supportive', 'kind', 'generous', 'caring', 'compassionate', 'thoughtful', 'considerate', 'engaging', 'collaborative', // Experience positives 'enjoyable', 'fun', 'entertaining', 'engaging', 'interesting', 'fascinating', 'captivating', 'inspiring', 'motivating', 'enriching', 'rewarding', // Growth positives 'growing', 'improving', 'developing', 'advancing', 'progressing', 'evolving', 'flourishing', 'thriving', 'prospering', 'succeeding' ]), negativeWords: new Set([ // Emotional negatives 'hate', 'angry', 'sad', 'upset', 'frustrated', 'disappointed', 'anxious', 'worried', 'stressed', 'depressed', 'miserable', 'unhappy', 'distressed', 'irritated', 'annoyed', 'furious', 'outraged', 'bitter', // Quality negatives 'poor', 'bad', 'terrible', 'horrible', 'awful', 'dreadful', 'inferior', 'mediocre', 'subpar', 'unacceptable', 'disappointing', 'inadequate', 'deficient', 'flawed', 'defective', // Performance negatives 'inefficient', 'ineffective', 'unreliable', 'problematic', 'failing', 'broken', 'malfunctioning', 'corrupted', 'crashed', 'buggy', 'error', 'failed', 'unsuccessful', 'unproductive', // Relationship negatives 'hostile', 'unfriendly', 'unhelpful', 'rude', 'mean', 'cruel', 'harsh', 'inconsiderate', 'selfish', 'aggressive', 'confrontational', 'toxic', // Experience negatives 'boring', 'dull', 'tedious', 'monotonous', 'uninteresting', 'tiresome', 'exhausting', 'frustrating', 'confusing', 'complicated', 'difficult', // Decline negatives 'declining', 'deteriorating', 'worsening', 'failing', 'regressing', 'degrading', 'diminishing', 'decreasing', 'falling', 'shrinking' ]), intensifiers: new Map([ // Strong intensifiers ['extremely', 2.0], ['absolutely', 2.0], ['completely', 2.0], ['totally', 2.0], ['entirely', 2.0], ['utterly', 2.0], // Moderate intensifiers ['very', 1.5], ['really', 1.5], ['particularly', 1.5], ['especially', 1.5], ['notably', 1.5], ['significantly', 1.5], // Mild intensifiers ['quite', 1.25], ['rather', 1.25], ['somewhat', 1.25], ['fairly', 1.25], ['pretty', 1.25], ['relatively', 1.25], // Emphatic phrases ['without a doubt', 2.0], ['beyond question', 2.0], ['by far', 1.75], ['to a great extent', 1.75] ]), negators: new Set([ // Direct negators 'not', 'no', 'never', 'none', 'neither', 'nor', 'nothing', // Contracted negators "n't", 'cannot', "won't", "wouldn't", "shouldn't", "couldn't", "haven't", "hasn't", "didn't", "isn't", "aren't", "weren't", // Complex negators 'hardly', 'scarcely', 'barely', 'rarely', 'seldom', 'few', 'little', 'nowhere', 'nobody', 'none', 'by no means', 'on no account', // Implicit negators 'deny', 'reject', 'refuse', 'prevent', 'avoid', 'stop', 'exclude', 'doubt', 'question', 'dispute' ]) }; // Merge with provided config const finalConfig = { ...defaultConfig, ...config }; /** * Core sentiment analyzer implementation * * @param {Object} config - Configuration object with word dictionaries * @returns {Object} Methods for text analysis * * Implementation Notes: * - Uses a sliding window approach for context analysis * - Implements multiplier-based intensity scoring * - Categorizes emotions using predefined taxonomies */ const createSentimentAnalyzer = (config) => { /** * Main text analysis function * * @param {string} text - Text to analyze * @returns {Object} Comprehensive analysis results * * Potential Improvements: * - Add sentence-level analysis * - Implement paragraph breakdown * - Add statistical confidence scores * - Consider word positioning and emphasis */ const analyzeText = (text) => { // Ensure text is a string and has content if (!text || typeof text !== 'string') { console.warn('Invalid input to analyzeText:', text); return { score: 0, words: [], summary: { positive: 0, negative: 0, neutral: 0 }, sentiment: 'Neutral', topEmotions: [], intensity: 'None', wordCount: 0 }; } const words = text.toLowerCase() .replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, '') .split(/\s+/); let score = 0; let multiplier = 1; const analyzedWords = []; const emotionCounts = new Map(); let positiveCount = 0; let negativeCount = 0; let intensifierCount = 0; // Emotion categories for classification const emotionCategories = { // Positive Emotions joy: ['happy', 'joy', 'delighted', 'pleased', 'excited', 'ecstatic', 'elated', 'jubilant', 'thrilled', 'overjoyed'], gratitude: ['grateful', 'thankful', 'blessed', 'appreciative', 'indebted', 'humbled', 'moved'], satisfaction: ['content', 'satisfied', 'fulfilled', 'pleased', 'accomplished', 'proud', 'complete'], optimism: ['optimistic', 'hopeful', 'promising', 'confident', 'assured', 'encouraged', 'positive'], serenity: ['peaceful', 'calm', 'tranquil', 'relaxed', 'serene', 'composed', 'centered'], amusement: ['fun', 'funny', 'amused', 'entertained', 'playful', 'silly', 'humorous', 'laughing'], interest: ['curious', 'intrigued', 'fascinated', 'engaged', 'absorbed', 'captivated', 'inspired'], admiration: ['impressed', 'awed', 'amazed', 'respected', 'valued', 'esteemed', 'revered'], love: ['loving', 'adoring', 'fond', 'affectionate', 'caring', 'cherished', 'devoted'], // Negative Emotions frustration: ['frustrated', 'annoyed', 'irritated', 'agitated', 'exasperated', 'thwarted', 'hindered'], concern: ['worried', 'concerned', 'anxious', 'uneasy', 'apprehensive', 'troubled', 'disturbed'], disappointment: ['disappointed', 'letdown', 'dissatisfied', 'disheartened', 'dismayed', 'unfulfilled'], anger: ['angry', 'furious', 'outraged', 'enraged', 'hostile', 'irate', 'livid', 'incensed'], sadness: ['sad', 'unhappy', 'sorrowful', 'depressed', 'melancholy', 'gloomy', 'heartbroken'], fear: ['afraid', 'scared', 'fearful', 'terrified', 'panicked', 'petrified', 'dreading'], confusion: ['confused', 'puzzled', 'perplexed', 'bewildered', 'disoriented', 'uncertain', 'unclear'], regret: ['regretful', 'sorry', 'remorseful', 'guilty', 'apologetic', 'ashamed', 'contrite'], // Complex Emotions anticipation: ['eager', 'anticipating', 'expecting', 'awaiting', 'looking forward', 'preparing'], surprise: ['surprised', 'astonished', 'startled', 'shocked', 'stunned', 'unexpected', 'remarkable'], nostalgia: ['nostalgic', 'reminiscent', 'remembering', 'longing', 'wistful', 'retrospective'], determination: ['determined', 'resolved', 'committed', 'focused', 'dedicated', 'persistent', 'steadfast'], relief: ['relieved', 'reassured', 'unburdened', 'comforted', 'calmed', 'settled', 'eased'], ambivalence: ['conflicted', 'uncertain', 'unsure', 'mixed feelings', 'undecided', 'torn'], // Professional/Work-Related confidence: ['confident', 'capable', 'competent', 'skilled', 'proficient', 'qualified', 'expert'], motivation: ['motivated', 'driven', 'inspired', 'energized', 'enthusiastic', 'passionate', 'eager'], productivity: ['productive', 'efficient', 'effective', 'accomplished', 'successful', 'achieving'], collaboration: ['collaborative', 'cooperative', 'supportive', 'helpful', 'team-oriented', 'united'], // Growth/Learning growth: ['growing', 'developing', 'improving', 'progressing', 'advancing', 'learning', 'evolving'], curiosity: ['curious', 'inquisitive', 'interested', 'exploring', 'discovering', 'wondering'], insight: ['understanding', 'realizing', 'comprehending', 'grasping', 'enlightened', 'aware'] }; for (let i = 0; i < words.length; i++) { const word = words[i]; let wordImpact = { word, score: 0, multiplier, category: null }; // Check for intensifiers if (config.intensifiers.has(word)) { multiplier = config.intensifiers.get(word); intensifierCount++; continue; } // Check for negators if (config.negators.has(word)) { multiplier *= -1; continue; } // Score the word and categorize emotions if (config.positiveWords.has(word)) { const wordScore = 1 * multiplier; score += wordScore; positiveCount++; wordImpact.score = wordScore; // Categorize emotion for (const [category, keywords] of Object.entries(emotionCategories)) { if (keywords.includes(word)) { wordImpact.category = category; emotionCounts.set(category, (emotionCounts.get(category) || 0) + 1); break; } } } else if (config.negativeWords.has(word)) { const wordScore = -1 * multiplier; score += wordScore; negativeCount++; wordImpact.score = wordScore; // Categorize emotion for (const [category, keywords] of Object.entries(emotionCategories)) { if (keywords.includes(word)) { wordImpact.category = category; emotionCounts.set(category, (emotionCounts.get(category) || 0) + 1); break; } } } if (wordImpact.score !== 0 || wordImpact.category) { analyzedWords.push(wordImpact); } // Reset multiplier after scoring a word multiplier = 1; } // Calculate intensity based on score magnitude and intensifier usage const getIntensity = (score, intensifierCount) => { const magnitude = Math.abs(score); if (magnitude > 10 || intensifierCount > 5) return 'Very Strong'; if (magnitude > 7 || intensifierCount > 3) return 'Strong'; if (magnitude > 4 || intensifierCount > 1) return 'Moderate'; if (magnitude > 0) return 'Mild'; return 'Neutral'; }; // Get top emotions const topEmotions = Array.from(emotionCounts.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 3) .map(([emotion, count]) => ({ emotion, count })); return { score, words: analyzedWords, summary: { positive: positiveCount, negative: negativeCount, neutral: words.length - positiveCount - negativeCount, total: words.length }, sentiment: getEmotionalTone(score), topEmotions, intensity: getIntensity(score, intensifierCount), wordCount: words.length, averageSentiment: score / words.length || 0 }; }; return { analyzeText, calculateSentimentScore: (text) => analyzeText(text).score, getEmotionalTone: (text) => analyzeText(text).sentiment, getTopWords: (text) => analyzeText(text).words }; }; // Re-use previous sentiment analysis functions const { analyzeText, calculateSentimentScore, getEmotionalTone, getTopWords } = createSentimentAnalyzer(finalConfig); /** * Fetches and extracts content from web pages * * @param {string} url - URL to analyze * @returns {Promise} Extracted text content * * Implementation Notes: * - Uses progressive enhancement for content selection * - Implements fallback strategies for content extraction * - Handles various DOM structures * * Potential Enhancements: * - Add support for dynamic content (SPA) * - Implement content cleaning rules * - Add support for paywalled content * - Handle rate limiting */ const fetchContent = async (url) => { try { const response = await fetch(url); const html = await response.text(); const dom = new JSDOM(html); const doc = dom.window.document; // Enhanced content selectors const contentSelectors = [ 'article', 'main', '.content', '.post-content', '.entry-content', '.article-content', '.blog-post', '.post', 'article p', '.content p', 'p' ]; let content = ''; for (const selector of contentSelectors) { const elements = doc.querySelectorAll(selector); if (elements.length) { elements.forEach(el => { // Skip if element contains mostly navigation/header/footer content if (el.closest('nav') || el.closest('header') || el.closest('footer')) { return; } content += el.textContent + '\n\n'; }); if (content.trim().length > 0) break; } } // If no content was found through selectors, get all text content if (!content) { content = doc.body.textContent || ''; } // Clean up the content content = content .replace(/\s+/g, ' ') .replace(/\n\s*\n/g, '\n\n') .trim(); // Ensure we're returning a string return content || ''; } catch (error) { console.error('Fetch error:', error); throw new Error(`Failed to fetch content: ${error.message}`); } }; /** * Extracts metadata from web documents * * @param {Document} doc - DOM document * @returns {Object} Extracted metadata * * Implementation Notes: * - Supports multiple metadata formats (meta tags, OpenGraph, etc.) * - Uses fallback strategies for missing data * * Potential Improvements: * - Add schema.org parsing * - Support more metadata formats * - Add validation and cleaning */ const extractMetadata = (doc) => { const metadata = { title: '', description: '', author: '', date: '', keywords: [] }; // Extract meta tags with enhanced selectors const metaTags = doc.querySelectorAll('meta'); metaTags.forEach(tag => { const name = tag.getAttribute('name')?.toLowerCase(); const property = tag.getAttribute('property')?.toLowerCase(); const content = tag.getAttribute('content'); if (content) { if (name === 'description' || property === 'og:description') { metadata.description = content; } if (name === 'author' || property === 'article:author') { metadata.author = content; } if (name === 'keywords') { metadata.keywords = content.split(',').map(k => k.trim()); } if (name === 'date' || property === 'article:published_time' || property === 'article:modified_time') { metadata.date = content; } } }); // Try different title sources metadata.title = doc.querySelector('meta[property="og:title"]')?.getAttribute('content') || doc.querySelector('h1')?.textContent || doc.title || ''; // Try to find author in structured data const authorElement = doc.querySelector('[rel="author"], .author, .byline'); if (authorElement && !metadata.author) { metadata.author = authorElement.textContent.trim(); } // Try to find date in structured data if (!metadata.date) { const dateElement = doc.querySelector('time, .date, .published'); if (dateElement) { metadata.date = dateElement.getAttribute('datetime') || dateElement.textContent.trim(); } } return metadata; }; /** * Analyzes sentiment of web page content * * @param {string} url - URL to analyze * @returns {Promise} Complete analysis results * * Potential Enhancements: * - Add caching * - Implement batch processing * - Add historical tracking */ const analyzeUrl = async (url) => { try { const content = await fetchContent(url); if (!content) { console.warn(`No content found for URL: ${url}`); return { score: 0, words: [], summary: { positive: 0, negative: 0, neutral: 0 }, sentiment: 'Neutral', topEmotions: [], intensity: 'None', wordCount: 0, url, metadata: {}, fetchDate: new Date().toISOString() }; } const analysis = analyzeText(content); // Create a new JSDOM instance for metadata extraction const response = await fetch(url); const html = await response.text(); const dom = new JSDOM(html); // Additional URL-specific analysis analysis.url = url; analysis.metadata = extractMetadata(dom.window.document); analysis.fetchDate = new Date().toISOString(); return analysis; } catch (error) { console.error('Analysis error:', error); throw new Error(`Analysis failed: ${error.message}`); } }; // Enhanced API return { analyzeText, analyzeUrl, addPositiveWords: (words) => words.forEach(word => finalConfig.positiveWords.add(word)), addNegativeWords: (words) => words.forEach(word => finalConfig.negativeWords.add(word)), addIntensifier: (word, multiplier) => finalConfig.intensifiers.set(word, multiplier), addNegator: (word) => finalConfig.negators.add(word), getConfig: () => ({ ...finalConfig }), getDictionaries: () => ({ positiveCount: finalConfig.positiveWords.size, negativeCount: finalConfig.negativeWords.size, intensifierCount: finalConfig.intensifiers.size, negatorCount: finalConfig.negators.size }) }; }; // Example usage: const analyzer = createWebSentimentAnalyzer(); /** * Creates a visual representation of sentiment score * * @param {number} score - Sentiment score to visualize * @returns {string} ASCII visualization of sentiment scale * * Design Notes: * - Uses Unicode characters for better visualization * - Implements fixed-width scale for consistent display * * Potential Enhancements: * - Add color support * - Implement alternative visualizations * - Add interactive elements */ const createSentimentScale = (score) => { const width = 40; // Width of the scale const middle = Math.floor(width / 2); // Clamp score between -10 and 10 for display purposes const clampedScore = Math.max(-10, Math.min(10, score)); const position = Math.round(middle + (clampedScore * middle / 10)); let scale = ''; for (let i = 0; i < width; i++) { if (i === middle) scale += '│'; // Using Unicode box drawing character else if (i === position) scale += '●'; else scale += '─'; } // Simpler scale display without arrows and extra spacing return ` NEGATIVE ${' '.repeat(middle-5)}NEUTRAL${' '.repeat(middle-5)} POSITIVE [-10] ${scale} [+10] Score: ${score.toFixed(2)} `; }; /** * Formats analysis results for human readability * * @param {Object} analysis - Analysis results to format * @returns {string} Formatted analysis report * * Implementation Notes: * - Uses structured format for consistency * - Implements progressive disclosure of details * * Potential Improvements: * - Add output format options (JSON, CSV, etc.) * - Implement templating system * - Add internationalization support */ const formatAnalysisResults = (analysis) => { const { score, summary, sentiment, topEmotions, intensity, wordCount, metadata, url } = analysis; return ` === Sentiment Analysis for ${metadata.title || url} === ${createSentimentScale(score)} Overall Assessment: • Sentiment: ${sentiment} (${intensity}) • Total Words Analyzed: ${wordCount} Word Breakdown: • Positive Words: ${summary.positive} • Negative Words: ${summary.negative} • Neutral Words: ${summary.neutral} ${topEmotions.length ? `Dominant Emotions: ${topEmotions.map(e => `• ${e.emotion} (mentioned ${e.count} time${e.count > 1 ? 's' : ''})`).join('\n')}` : ''} Content Details: • Author: ${metadata.author || 'Not specified'} • Date: ${metadata.date || 'Not specified'} ${metadata.description ? `• Description: ${metadata.description}` : ''} Notable Words: ${analysis.words .filter(w => w.score !== 0) .slice(0, 5) .map(w => `• "${w.word}" (${w.score > 0 ? 'positive' : 'negative'}, ${w.category || 'general'})`) .join('\n')} ${'-'.repeat(60)} `; }; // Update the analyzeWebPage function const analyzeWebPage = async (url) => { try { const analysis = await analyzer.analyzeUrl(url); console.log(formatAnalysisResults(analysis)); } catch (error) { console.error(`\n❌ Analysis failed for ${url}:`, error.message); } }; // Example: // analyzeWebPage('https://example.com/blog-post'); // Add custom words // analyzer.addPositiveWords(['groundbreaking', 'game-changing']); // analyzer.addNegativeWords(['concerning', 'questionable']); // analyzer.addIntensifier('incredibly', 1.8); // analyzer.addNegator('lacks'); // // Get dictionary stats // console.log(analyzer.getDictionaries()); // Remove the hard-coded URLs and add CLI handling const helpText = ` Sentiment Analyzer ================= Analyzes the sentiment of web pages and provides detailed emotional analysis. Usage: bun run app.js bun run app.js ... Example: bun run app.js https://example.com/blog-post bun run app.js https://blog1.com https://blog2.com Options: --help, -h Show this help message `; /** * CLI program entry point * * Implementation Notes: * - Uses async/await for proper error handling * - Implements command pattern for URL processing * * Potential Enhancements: * - Add configuration file support * - Implement batch processing from file * - Add progress indicators * - Add output formatting options */ const main = async () => { // Get command line arguments (skip first two as they're node/bun and script path) const args = process.argv.slice(2); // Show help if no arguments or help flag if (args.length === 0 || args.includes('--help') || args.includes('-h')) { console.log(helpText); return; } // Create analyzer instance const analyzer = createWebSentimentAnalyzer(); // Analyze each URL for (const url of args) { try { // Skip any help flags that might have been passed if (url.startsWith('-')) continue; await analyzeWebPage(url); } catch (error) { console.error(`\n❌ Failed to analyze ${url}:`, error.message); } } }; // Run the program main().catch(error => { console.error('Fatal error:', error); process.exit(1); });