/** * Web Sentiment Analyzer * * This program analyzes the sentiment of web content using a combination of: * - Dictionary-based sentiment analysis * - Emotion categorization * - Intensity analysis * - Web content extraction * - Metadata parsing * * Architecture Overview: * - Factory Pattern: Uses createWebSentimentAnalyzer to create analyzer instances * - Builder Pattern: Configurable through dictionary additions and modifications * - Strategy Pattern: Separates content fetching, analysis, and display logic * - Command Pattern: CLI interface for processing multiple URLs * * @module sentiment-analyzer */ import { JSDOM } from 'jsdom'; /** * Creates a web-enabled sentiment analyzer with extended capabilities * * @param {Object} config - Optional configuration to override default dictionaries * @returns {Object} An analyzer instance with public methods for sentiment analysis * * Extensibility Points: * - Add more dictionaries (e.g., industry-specific terms) * - Enhance emotion categories * - Add language support * - Implement ML-based sentiment analysis */ const createWebSentimentAnalyzer = (config = {}) => { /** * Default configuration with extensive sentiment dictionaries * * @property {Set} positiveWords - Words indicating positive sentiment * @property {Set} negativeWords - Words indicating negative sentiment * @property {Map} intensifiers - Words that modify sentiment intensity * @property {Set} negators - Words that negate sentiment * * Potential Enhancements: * - Add multi-word phrases * - Include context-dependent sentiments * - Add domain-specific dictionaries */ const defaultConfig = { positiveWords: new Set([ // Emotional positives 'love', 'joy', 'happy', 'excited', 'peaceful', 'wonderful', 'fantastic', 'delighted', 'pleased', 'glad', 'cheerful', 'content', 'satisfied', 'grateful', 'thankful', 'blessed', 'optimistic', 'hopeful', // Quality positives 'excellent', 'outstanding', 'superb', 'magnificent', 'brilliant', 'exceptional', 'perfect', 'remarkable', 'spectacular', 'impressive', 'incredible', 'amazing', 'extraordinary', 'marvelous', 'wonderful', // Performance positives 'efficient', 'effective', 'reliable', 'innovative', 'productive', 'successful', 'accomplished', 'achieved', 'improved', 'enhanced', 'optimized', 'streamlined', 'breakthrough', 'revolutionary', // Relationship positives 'friendly', 'helpful', 'supportive', 'kind', 'generous', 'caring', 'compassionate', 'thoughtful', 'considerate', 'engaging', 'collaborative', // Experience positives 'enjoyable', 'fun', 'entertaining', 'engaging', 'interesting', 'fascinating', 'captivating', 'inspiring', 'motivating', 'enriching', 'rewarding', // Growth positives 'growing', 'improving', 'developing', 'advancing', 'progressing', 'evolving', 'flourishing', 'thriving', 'prospering', 'succeeding' ]), negativeWords: new Set([ // Emotional negatives 'hate', 'angry', 'sad', 'upset', 'frustrated', 'disappointed', 'anxious', 'worried', 'stressed', 'depressed', 'miserable', 'unhappy', 'distressed', 'irritated', 'annoyed', 'furious', 'outraged', 'bitter', // Quality negatives 'poor', 'bad', 'terrible', 'horrible', 'awful', 'dreadful', 'inferior', 'mediocre', 'subpar', 'unacceptable', 'disappointing', 'inadequate', 'deficient', 'flawed', 'defective', // Performance negatives 'inefficient', 'ineffective', 'unreliable', 'problematic', 'failing', 'broken', 'malfunctioning', 'corrupted', 'crashed', 'buggy', 'error', 'failed', 'unsuccessful', 'unproductive', // Relationship negatives 'hostile', 'unfriendly', 'unhelpful', 'rude', 'mean', 'cruel', 'harsh', 'inconsiderate', 'selfish', 'aggressive', 'confrontational', 'toxic', // Experience negatives 'boring', 'dull', 'tedious', 'monotonous', 'uninteresting', 'tiresome', 'exhausting', 'frustrating', 'confusing', 'complicated', 'difficult', // Decline negatives 'declining', 'deteriorating', 'worsening', 'failing', 'regressing', 'degrading', 'diminishing', 'decreasing', 'falling', 'shrinking' ]), intensifiers: new Map([ // Strong intensifiers ['extremely', 2.0], ['absolutely', 2.0], ['completely', 2.0], ['totally', 2.0], ['entirely', 2.0], ['utterly', 2.0], // Moderate intensifiers ['very', 1.5], ['really', 1.5], ['particularly', 1.5], ['especially', 1.5], ['notably', 1.5], ['significantly', 1.5], // Mild intensifiers ['quite', 1.25], ['rather', 1.25], ['somewhat', 1.25], ['fairly', 1.25], ['pretty', 1.25], ['relatively', 1.25], // Emphatic phrases ['without a doubt', 2.0], ['beyond question', 2.0], ['by far', 1.75], ['to a great extent', 1.75] ]), negators: new Set([ // Direct negators 'not', 'no', 'never', 'none', 'neither', 'nor', 'nothing', // Contracted negators "n't", 'cannot', "won't", "wouldn't", "shouldn't", "couldn't", "haven't", "hasn't", "didn't", "isn't", "aren't", "weren't", // Complex negators 'hardly', 'scarcely', 'barely', 'rarely', 'seldom', 'few', 'little', 'nowhere', 'nobody', 'none', 'by no means', 'on no account', // Implicit negators 'deny', 'reject', 'refuse', 'prevent', 'avoid', 'stop', 'exclude', 'doubt', 'question', 'dispute' ]) }; // Merge with provided config const finalConfig = { ...defaultConfig, ...config }; /** * Core sentiment analyzer implementation * * @param {Object} config - Configuration object with word dictionaries * @returns {Object} Methods for text analysis * * Implementation Notes: * - Uses a sliding window approach for context analysis * - Implements multiplier-based intensity scoring * - Categorizes emotions using predefined taxonomies */ const createSentimentAnalyzer = (config) => { /** * Main text analysis function * * @param {string} text - Text to analyze * @returns {Object} Comprehensive analysis results * * Potential Improvements: * - Add sentence-level analysis * - Implement paragraph breakdown * - Add statistical confidence scores * - Consider word positioning and emphasis */ const analyzeText = (text) => { // Ensure text is a string and has content if (!text || typeof text !== 'string') { console.warn('Invalid input to analyzeText:', text); return { score: 0, words: [], summary: { positive: 0, negative: 0, neutral: 0 }, sentiment: 'Neutral', topEmotions: [], intensity: 'None', wordCount: 0 }; } const words = text.toLowerCase() .replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, '') .split(/\s+/); let score = 0; let multiplier = 1; const analyzedWords = []; const emotionCounts = new Map(); let positiveCount = 0; let negativeCount = 0; let intensifierCount = 0; // Emotion categories for classification const emotionCategories = { // Strong positive emotions (weight: 2.0) joy: { weight: 2.0, words: ['happy', 'joy', 'delighted', 'pleased', 'excited', 'ecstatic'] }, love: { weight: 2.0, words: ['loving', 'adoring', 'fond', 'affectionate', 'caring'] }, // Moderate positive emotions (weight: 1.5) satisfaction: { weight: 1.5, words: ['content', 'satisfied', 'fulfilled', 'pleased', 'accomplished'] }, // Strong negative emotions (weight: -2.0) anger: { weight: -2.0, words: ['angry', 'furious', 'outraged', 'enraged', 'hostile'] }, hate: { weight: -2.0, words: ['hate', 'despise', 'loathe', 'detest', 'abhor'] }, // Moderate negative emotions (weight: -1.5) frustration: { weight: -1.5, words: ['frustrated', 'annoyed', 'irritated', 'agitated'] } // ... other categories with appropriate weights }; for (let i = 0; i < words.length; i++) { const word = words[i]; let wordImpact = { word, score: 0, multiplier, category: null }; // Check for intensifiers if (config.intensifiers.has(word)) { multiplier = config.intensifiers.get(word); intensifierCount++; continue; } // Check for negators if (config.negators.has(word)) { multiplier *= -1; continue; } // Score the word and categorize emotions if (config.positiveWords.has(word)) { // Increase base score for positive words const baseScore = 2; const wordScore = baseScore * multiplier; score += wordScore; positiveCount++; wordImpact.score = wordScore; // Categorize emotion with weights for (const [category, {weight, words}] of Object.entries(emotionCategories)) { if (words.includes(word)) { wordImpact.category = category; emotionCounts.set(category, (emotionCounts.get(category) || 0) + 1); // Add weighted bonus score score += weight * multiplier; break; } } } else if (config.negativeWords.has(word)) { // Increase base score for negative words const baseScore = -2; const wordScore = baseScore * multiplier; score += wordScore; negativeCount++; wordImpact.score = wordScore; // Categorize emotion with weights for (const [category, {weight, words}] of Object.entries(emotionCategories)) { if (words.includes(word)) { wordImpact.category = category; emotionCounts.set(category, (emotionCounts.get(category) || 0) + 1); // Add weighted bonus score score -= weight * multiplier; break; } } } if (wordImpact.score !== 0 || wordImpact.category) { analyzedWords.push(wordImpact); } // Reset multiplier after scoring a word multiplier = 1; } // Calculate intensity based on score magnitude and intensifier usage const getIntensity = (score, intensifierCount) => { const magnitude = Math.abs(score); if (magnitude > 10 || intensifierCount > 5) return 'Very Strong'; if (magnitude > 7 || intensifierCount > 3) return 'Strong'; if (magnitude > 4 || intensifierCount > 1) return 'Moderate'; if (magnitude > 0) return 'Mild'; return 'Neutral'; }; // Get top emotions const topEmotions = Array.from(emotionCounts.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 3) .map(([emotion, count]) => ({ emotion, count })); // Calculate the final score and clamp it between -10 and 10 const clampedScore = Math.max(-10, Math.min(10, score)); // Only count words that contribute to sentiment const sentimentWords = positiveCount + negativeCount; const averageSentiment = sentimentWords > 0 ? clampedScore / sentimentWords : 0; return { score: clampedScore, words: analyzedWords, summary: { positive: positiveCount, negative: negativeCount, sentiment_words: sentimentWords, total: words.length }, sentiment: getEmotionalTone(clampedScore), topEmotions, intensity: getIntensity(clampedScore, intensifierCount), wordCount: words.length, averageSentiment }; }; return { analyzeText, calculateSentimentScore: (text) => analyzeText(text).score, getEmotionalTone: (text) => analyzeText(text).sentiment, getTopWords: (text) => analyzeText(text).words }; }; // Re-use previous sentiment analysis functions const { analyzeText, calculateSentimentScore, getEmotionalTone, getTopWords } = createSentimentAnalyzer(finalConfig); /** * Fetches and extracts content from web pages * * @param {string} url - URL to analyze * @returns {Promise} Extracted text content * * Implementation Notes: * - Uses progressive enhancement for content selection * - Implements fallback strategies for content extraction * - Handles various DOM structures * * Potential Enhancements: * - Add support for dynamic content (SPA) * - Implement content cleaning rules * - Add support for paywalled content * - Handle rate limiting */ const fetchContent = async (url) => { try { const response = await fetch(url); const html = await response.text(); const dom = new JSDOM(html); const doc = dom.window.document; // Enhanced content selectors const contentSelectors = [ 'article', 'main', '.content', '.post-content', '.entry-content', '.article-content', '.blog-post', '.post', 'article p', '.content p', 'p' ]; let content = ''; for (const selector of contentSelectors) { const elements = doc.querySelectorAll(selector); if (elements.length) { elements.forEach(el => { // Skip if element contains mostly navigation/header/footer content if (el.closest('nav') || el.closest('header') || el.closest('footer')) { return; } content += el.textContent + '\n\n'; }); if (content.trim().length > 0) break; } } // If no content was found through selectors, get all text content if (!content) { content = doc.body.textContent || ''; } // Clean up the content content = content .replace(/\s+/g, ' ') .replace(/\n\s*\n/g, '\n\n') .trim(); // Ensure we're returning a string return content || ''; } catch (error) { console.error('Fetch error:', error); throw new Error(`Failed to fetch content: ${error.message}`); } }; /** * Extracts metadata from web documents * * @param {Document} doc - DOM document * @returns {Object} Extracted metadata * * Implementation Notes: * - Supports multiple metadata formats (meta tags, OpenGraph, etc.) * - Uses fallback strategies for missing data * * Potential Improvements: * - Add schema.org parsing * - Support more metadata formats * - Add validation and cleaning */ const extractMetadata = (doc) => { const metadata = { title: '', description: '', author: '', date: '', keywords: [] }; // Extract meta tags with enhanced selectors const metaTags = doc.querySelectorAll('meta'); metaTags.forEach(tag => { const name = tag.getAttribute('name')?.toLowerCase(); const property = tag.getAttribute('property')?.toLowerCase(); const content = tag.getAttribute('content'); if (content) { if (name === 'description' || property === 'og:description') { metadata.description = content; } if (name === 'author' || property === 'article:author') { metadata.author = content; } if (name === 'keywords') { metadata.keywords = content.split(',').map(k => k.trim()); } if (name === 'date' || property === 'article:published_time' || property === 'article:modified_time') { metadata.date = content; } } }); // Try different title sources metadata.title = doc.querySelector('meta[property="og:title"]')?.getAttribute('content') || doc.querySelector('h1')?.textContent || doc.title || ''; // Try to find author in structured data const authorElement = doc.querySelector('[rel="author"], .author, .byline'); if (authorElement && !metadata.author) { metadata.author = authorElement.textContent.trim(); } // Try to find date in structured data if (!metadata.date) { const dateElement = doc.querySelector('time, .date, .published'); if (dateElement) { metadata.date = dateElement.getAttribute('datetime') || dateElement.textContent.trim(); } } return metadata; }; /** * Analyzes sentiment of web page content * * @param {string} url - URL to analyze * @returns {Promise} Complete analysis results * * Potential Enhancements: * - Add caching * - Implement batch processing * - Add historical tracking */ const analyzeUrl = async (url) => { try { const content = await fetchContent(url); if (!content) { console.warn(`No content found for URL: ${url}`); return { score: 0, words: [], summary: { positive: 0, negative: 0, neutral: 0 }, sentiment: 'Neutral', topEmotions: [], intensity: 'None', wordCount: 0, url, metadata: {}, fetchDate: new Date().toISOString() }; } const analysis = analyzeText(content); // Create a new JSDOM instance for metadata extraction const response = await fetch(url); const html = await response.text(); const dom = new JSDOM(html); // Additional URL-specific analysis analysis.url = url; analysis.metadata = extractMetadata(dom.window.document); analysis.fetchDate = new Date().toISOString(); return analysis; } catch (error) { console.error('Analysis error:', error); throw new Error(`Analysis failed: ${error.message}`); } }; // Enhanced API return { analyzeText, analyzeUrl, addPositiveWords: (words) => words.forEach(word => finalConfig.positiveWords.add(word)), addNegativeWords: (words) => words.forEach(word => finalConfig.negativeWords.add(word)), addIntensifier: (word, multiplier) => finalConfig.intensifiers.set(word, multiplier), addNegator: (word) => finalConfig.negators.add(word), getConfig: () => ({ ...finalConfig }), getDictionaries: () => ({ positiveCount: finalConfig.positiveWords.size, negativeCount: finalConfig.negativeWords.size, intensifierCount: finalConfig.intensifiers.size, negatorCount: finalConfig.negators.size }) }; }; // Example usage: const analyzer = createWebSentimentAnalyzer(); /** * Creates a visual representation of sentiment score * * @param {number} score - Sentiment score to visualize * @returns {string} ASCII visualization of sentiment scale * * Design Notes: * - Uses Unicode characters for better visualization * - Implements fixed-width scale for consistent display * * Potential Enhancements: * - Add color support * - Implement alternative visualizations * - Add interactive elements */ const createSentimentScale = (score) => { const width = 40; // Width of the scale const middle = Math.floor(width / 2); // Clamp score between -10 and 10 for display purposes const clampedScore = Math.max(-10, Math.min(10, score)); const position = Math.round(middle + (clampedScore * middle / 10)); let scale = ''; for (let i = 0; i < width; i++) { if (i === middle) scale += '│'; // Using Unicode box drawing character else if (i === position) scale += '●'; else scale += '─'; } // Simpler scale display without arrows and extra spacing return ` NEGATIVE ${' '.repeat(middle-5)}NEUTRAL${' '.repeat(middle-5)} POSITIVE [-10] ${scale} [+10] Score: ${score.toFixed(2)} `; }; /** * Formats analysis results for human readability * * @param {Object} analysis - Analysis results to format * @returns {string} Formatted analysis report * * Implementation Notes: * - Uses structured format for consistency * - Implements progressive disclosure of details * * Potential Improvements: * - Add output format options (JSON, CSV, etc.) * - Implement templating system * - Add internationalization support */ const formatAnalysisResults = (analysis) => { const { score, summary, sentiment, topEmotions, intensity, wordCount, metadata, url } = analysis; return ` === Sentiment Analysis for ${metadata.title || url} === ${createSentimentScale(score)} Overall Assessment: • Sentiment: ${sentiment} (${intensity}) • Total Words Analyzed: ${wordCount} Word Breakdown: • Positive Words: ${summary.positive} • Negative Words: ${summary.negative} • Sentiment-Carrying Words: ${summary.sentiment_words} (of ${summary.total} total) ${topEmotions.length ? `Dominant Emotions: ${topEmotions.map(e => `• ${e.emotion} (mentioned ${e.count} time${e.count > 1 ? 's' : ''})`).join('\n')}` : ''} Content Details: • Author: ${metadata.author || 'Not specified'} • Date: ${metadata.date || 'Not specified'} ${metadata.description ? `• Description: ${metadata.description}` : ''} Notable Words: ${analysis.words .filter(w => w.score !== 0) .slice(0, 5) .map(w => `• "${w.word}" (${w.score > 0 ? 'positive' : 'negative'}, ${w.category || 'general'})`) .join('\n')} ${'-'.repeat(60)} `; }; // Update the analyzeWebPage function const analyzeWebPage = async (url) => { try { const analysis = await analyzer.analyzeUrl(url); console.log(formatAnalysisResults(analysis)); } catch (error) { console.error(`\n❌ Analysis failed for ${url}:`, error.message); } }; // Example: // analyzeWebPage('https://example.com/blog-post'); // Add custom words // analyzer.addPositiveWords(['groundbreaking', 'game-changing']); // analyzer.addNegativeWords(['concerning', 'questionable']); // analyzer.addIntensifier('incredibly', 1.8); // analyzer.addNegator('lacks'); // // Get dictionary stats // console.log(analyzer.getDictionaries()); // Remove the hard-coded URLs and add CLI handling const helpText = ` Sentiment Analyzer ================= Analyzes the sentiment of web pages and provides detailed emotional analysis. Usage: bun run app.js bun run app.js ... Example: bun run app.js https://example.com/blog-post bun run app.js https://blog1.com https://blog2.com Options: --help, -h Show this help message `; /** * CLI program entry point * * Implementation Notes: * - Uses async/await for proper error handling * - Implements command pattern for URL processing * * Potential Enhancements: * - Add configuration file support * - Implement batch processing from file * - Add progress indicators * - Add output formatting options */ const main = async () => { // Get command line arguments (skip first two as they're node/bun and script path) const args = process.argv.slice(2); // Show help if no arguments or help flag if (args.length === 0 || args.includes('--help') || args.includes('-h')) { console.log(helpText); return; } // Create analyzer instance // const analyzer = createWebSentimentAnalyzer(); // Analyze each URL for (const url of args) { try { // Skip any help flags that might have been passed if (url.startsWith('-')) continue; await analyzeWebPage(url); } catch (error) { console.error(`\n❌ Failed to analyze ${url}:`, error.message); } } }; // Run the program main().catch(error => { console.error('Fatal error:', error); process.exit(1); });