diff options
Diffstat (limited to 'js/sentiment/app.js')
-rw-r--r-- | js/sentiment/app.js | 757 |
1 files changed, 757 insertions, 0 deletions
diff --git a/js/sentiment/app.js b/js/sentiment/app.js new file mode 100644 index 0000000..66735fd --- /dev/null +++ b/js/sentiment/app.js @@ -0,0 +1,757 @@ +/** + * Web Sentiment Analyzer + * + * This program analyzes the sentiment of web content using a combination of: + * - Dictionary-based sentiment analysis + * - Emotion categorization + * - Intensity analysis + * - Web content extraction + * - Metadata parsing + * + * Architecture Overview: + * - Factory Pattern: Uses createWebSentimentAnalyzer to create analyzer instances + * - Builder Pattern: Configurable through dictionary additions and modifications + * - Strategy Pattern: Separates content fetching, analysis, and display logic + * - Command Pattern: CLI interface for processing multiple URLs + * + * @module sentiment-analyzer + */ + +import { JSDOM } from 'jsdom'; + +/** + * Creates a web-enabled sentiment analyzer with extended capabilities + * + * @param {Object} config - Optional configuration to override default dictionaries + * @returns {Object} An analyzer instance with public methods for sentiment analysis + * + * Extensibility Points: + * - Add more dictionaries (e.g., industry-specific terms) + * - Enhance emotion categories + * - Add language support + * - Implement ML-based sentiment analysis + */ +const createWebSentimentAnalyzer = (config = {}) => { + /** + * Default configuration with extensive sentiment dictionaries + * + * @property {Set} positiveWords - Words indicating positive sentiment + * @property {Set} negativeWords - Words indicating negative sentiment + * @property {Map} intensifiers - Words that modify sentiment intensity + * @property {Set} negators - Words that negate sentiment + * + * Potential Enhancements: + * - Add multi-word phrases + * - Include context-dependent sentiments + * - Add domain-specific dictionaries + */ + const defaultConfig = { + positiveWords: new Set([ + // Emotional positives + 'love', 'joy', 'happy', 'excited', 'peaceful', 'wonderful', 'fantastic', + 'delighted', 'pleased', 'glad', 'cheerful', 'content', 'satisfied', + 'grateful', 'thankful', 'blessed', 'optimistic', 'hopeful', + + // Quality positives + 'excellent', 'outstanding', 'superb', 'magnificent', 'brilliant', + 'exceptional', 'perfect', 'remarkable', 'spectacular', 'impressive', + 'incredible', 'amazing', 'extraordinary', 'marvelous', 'wonderful', + + // Performance positives + 'efficient', 'effective', 'reliable', 'innovative', 'productive', + 'successful', 'accomplished', 'achieved', 'improved', 'enhanced', + 'optimized', 'streamlined', 'breakthrough', 'revolutionary', + + // Relationship positives + 'friendly', 'helpful', 'supportive', 'kind', 'generous', 'caring', + 'compassionate', 'thoughtful', 'considerate', 'engaging', 'collaborative', + + // Experience positives + 'enjoyable', 'fun', 'entertaining', 'engaging', 'interesting', 'fascinating', + 'captivating', 'inspiring', 'motivating', 'enriching', 'rewarding', + + // Growth positives + 'growing', 'improving', 'developing', 'advancing', 'progressing', + 'evolving', 'flourishing', 'thriving', 'prospering', 'succeeding' + ]), + + negativeWords: new Set([ + // Emotional negatives + 'hate', 'angry', 'sad', 'upset', 'frustrated', 'disappointed', 'anxious', + 'worried', 'stressed', 'depressed', 'miserable', 'unhappy', 'distressed', + 'irritated', 'annoyed', 'furious', 'outraged', 'bitter', + + // Quality negatives + 'poor', 'bad', 'terrible', 'horrible', 'awful', 'dreadful', 'inferior', + 'mediocre', 'subpar', 'unacceptable', 'disappointing', 'inadequate', + 'deficient', 'flawed', 'defective', + + // Performance negatives + 'inefficient', 'ineffective', 'unreliable', 'problematic', 'failing', + 'broken', 'malfunctioning', 'corrupted', 'crashed', 'buggy', 'error', + 'failed', 'unsuccessful', 'unproductive', + + // Relationship negatives + 'hostile', 'unfriendly', 'unhelpful', 'rude', 'mean', 'cruel', 'harsh', + 'inconsiderate', 'selfish', 'aggressive', 'confrontational', 'toxic', + + // Experience negatives + 'boring', 'dull', 'tedious', 'monotonous', 'uninteresting', 'tiresome', + 'exhausting', 'frustrating', 'confusing', 'complicated', 'difficult', + + // Decline negatives + 'declining', 'deteriorating', 'worsening', 'failing', 'regressing', + 'degrading', 'diminishing', 'decreasing', 'falling', 'shrinking' + ]), + + intensifiers: new Map([ + // Strong intensifiers + ['extremely', 2.0], + ['absolutely', 2.0], + ['completely', 2.0], + ['totally', 2.0], + ['entirely', 2.0], + ['utterly', 2.0], + + // Moderate intensifiers + ['very', 1.5], + ['really', 1.5], + ['particularly', 1.5], + ['especially', 1.5], + ['notably', 1.5], + ['significantly', 1.5], + + // Mild intensifiers + ['quite', 1.25], + ['rather', 1.25], + ['somewhat', 1.25], + ['fairly', 1.25], + ['pretty', 1.25], + ['relatively', 1.25], + + // Emphatic phrases + ['without a doubt', 2.0], + ['beyond question', 2.0], + ['by far', 1.75], + ['to a great extent', 1.75] + ]), + + negators: new Set([ + // Direct negators + 'not', 'no', 'never', 'none', 'neither', 'nor', 'nothing', + + // Contracted negators + "n't", 'cannot', "won't", "wouldn't", "shouldn't", "couldn't", "haven't", + "hasn't", "didn't", "isn't", "aren't", "weren't", + + // Complex negators + 'hardly', 'scarcely', 'barely', 'rarely', 'seldom', 'few', 'little', + 'nowhere', 'nobody', 'none', 'by no means', 'on no account', + + // Implicit negators + 'deny', 'reject', 'refuse', 'prevent', 'avoid', 'stop', 'exclude', + 'doubt', 'question', 'dispute' + ]) + }; + + // Merge with provided config + const finalConfig = { ...defaultConfig, ...config }; + + /** + * Core sentiment analyzer implementation + * + * @param {Object} config - Configuration object with word dictionaries + * @returns {Object} Methods for text analysis + * + * Implementation Notes: + * - Uses a sliding window approach for context analysis + * - Implements multiplier-based intensity scoring + * - Categorizes emotions using predefined taxonomies + */ + const createSentimentAnalyzer = (config) => { + /** + * Main text analysis function + * + * @param {string} text - Text to analyze + * @returns {Object} Comprehensive analysis results + * + * Potential Improvements: + * - Add sentence-level analysis + * - Implement paragraph breakdown + * - Add statistical confidence scores + * - Consider word positioning and emphasis + */ + const analyzeText = (text) => { + // Ensure text is a string and has content + if (!text || typeof text !== 'string') { + console.warn('Invalid input to analyzeText:', text); + return { + score: 0, + words: [], + summary: { positive: 0, negative: 0, neutral: 0 }, + sentiment: 'Neutral', + topEmotions: [], + intensity: 'None', + wordCount: 0 + }; + } + + const words = text.toLowerCase() + .replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, '') + .split(/\s+/); + + let score = 0; + let multiplier = 1; + const analyzedWords = []; + const emotionCounts = new Map(); + let positiveCount = 0; + let negativeCount = 0; + let intensifierCount = 0; + + // Emotion categories for classification + const emotionCategories = { + // Positive Emotions + joy: ['happy', 'joy', 'delighted', 'pleased', 'excited', 'ecstatic', 'elated', 'jubilant', 'thrilled', 'overjoyed'], + gratitude: ['grateful', 'thankful', 'blessed', 'appreciative', 'indebted', 'humbled', 'moved'], + satisfaction: ['content', 'satisfied', 'fulfilled', 'pleased', 'accomplished', 'proud', 'complete'], + optimism: ['optimistic', 'hopeful', 'promising', 'confident', 'assured', 'encouraged', 'positive'], + serenity: ['peaceful', 'calm', 'tranquil', 'relaxed', 'serene', 'composed', 'centered'], + amusement: ['fun', 'funny', 'amused', 'entertained', 'playful', 'silly', 'humorous', 'laughing'], + interest: ['curious', 'intrigued', 'fascinated', 'engaged', 'absorbed', 'captivated', 'inspired'], + admiration: ['impressed', 'awed', 'amazed', 'respected', 'valued', 'esteemed', 'revered'], + love: ['loving', 'adoring', 'fond', 'affectionate', 'caring', 'cherished', 'devoted'], + + // Negative Emotions + frustration: ['frustrated', 'annoyed', 'irritated', 'agitated', 'exasperated', 'thwarted', 'hindered'], + concern: ['worried', 'concerned', 'anxious', 'uneasy', 'apprehensive', 'troubled', 'disturbed'], + disappointment: ['disappointed', 'letdown', 'dissatisfied', 'disheartened', 'dismayed', 'unfulfilled'], + anger: ['angry', 'furious', 'outraged', 'enraged', 'hostile', 'irate', 'livid', 'incensed'], + sadness: ['sad', 'unhappy', 'sorrowful', 'depressed', 'melancholy', 'gloomy', 'heartbroken'], + fear: ['afraid', 'scared', 'fearful', 'terrified', 'panicked', 'petrified', 'dreading'], + confusion: ['confused', 'puzzled', 'perplexed', 'bewildered', 'disoriented', 'uncertain', 'unclear'], + regret: ['regretful', 'sorry', 'remorseful', 'guilty', 'apologetic', 'ashamed', 'contrite'], + + // Complex Emotions + anticipation: ['eager', 'anticipating', 'expecting', 'awaiting', 'looking forward', 'preparing'], + surprise: ['surprised', 'astonished', 'startled', 'shocked', 'stunned', 'unexpected', 'remarkable'], + nostalgia: ['nostalgic', 'reminiscent', 'remembering', 'longing', 'wistful', 'retrospective'], + determination: ['determined', 'resolved', 'committed', 'focused', 'dedicated', 'persistent', 'steadfast'], + relief: ['relieved', 'reassured', 'unburdened', 'comforted', 'calmed', 'settled', 'eased'], + ambivalence: ['conflicted', 'uncertain', 'unsure', 'mixed feelings', 'undecided', 'torn'], + + // Professional/Work-Related + confidence: ['confident', 'capable', 'competent', 'skilled', 'proficient', 'qualified', 'expert'], + motivation: ['motivated', 'driven', 'inspired', 'energized', 'enthusiastic', 'passionate', 'eager'], + productivity: ['productive', 'efficient', 'effective', 'accomplished', 'successful', 'achieving'], + collaboration: ['collaborative', 'cooperative', 'supportive', 'helpful', 'team-oriented', 'united'], + + // Growth/Learning + growth: ['growing', 'developing', 'improving', 'progressing', 'advancing', 'learning', 'evolving'], + curiosity: ['curious', 'inquisitive', 'interested', 'exploring', 'discovering', 'wondering'], + insight: ['understanding', 'realizing', 'comprehending', 'grasping', 'enlightened', 'aware'] + }; + + for (let i = 0; i < words.length; i++) { + const word = words[i]; + let wordImpact = { + word, + score: 0, + multiplier, + category: null + }; + + // Check for intensifiers + if (config.intensifiers.has(word)) { + multiplier = config.intensifiers.get(word); + intensifierCount++; + continue; + } + + // Check for negators + if (config.negators.has(word)) { + multiplier *= -1; + continue; + } + + // Score the word and categorize emotions + if (config.positiveWords.has(word)) { + const wordScore = 1 * multiplier; + score += wordScore; + positiveCount++; + wordImpact.score = wordScore; + + // Categorize emotion + for (const [category, keywords] of Object.entries(emotionCategories)) { + if (keywords.includes(word)) { + wordImpact.category = category; + emotionCounts.set(category, (emotionCounts.get(category) || 0) + 1); + break; + } + } + + } else if (config.negativeWords.has(word)) { + const wordScore = -1 * multiplier; + score += wordScore; + negativeCount++; + wordImpact.score = wordScore; + + // Categorize emotion + for (const [category, keywords] of Object.entries(emotionCategories)) { + if (keywords.includes(word)) { + wordImpact.category = category; + emotionCounts.set(category, (emotionCounts.get(category) || 0) + 1); + break; + } + } + } + + if (wordImpact.score !== 0 || wordImpact.category) { + analyzedWords.push(wordImpact); + } + + // Reset multiplier after scoring a word + multiplier = 1; + } + + // Calculate intensity based on score magnitude and intensifier usage + const getIntensity = (score, intensifierCount) => { + const magnitude = Math.abs(score); + if (magnitude > 10 || intensifierCount > 5) return 'Very Strong'; + if (magnitude > 7 || intensifierCount > 3) return 'Strong'; + if (magnitude > 4 || intensifierCount > 1) return 'Moderate'; + if (magnitude > 0) return 'Mild'; + return 'Neutral'; + }; + + // Get top emotions + const topEmotions = Array.from(emotionCounts.entries()) + .sort((a, b) => b[1] - a[1]) + .slice(0, 3) + .map(([emotion, count]) => ({ emotion, count })); + + return { + score, + words: analyzedWords, + summary: { + positive: positiveCount, + negative: negativeCount, + neutral: words.length - positiveCount - negativeCount, + total: words.length + }, + sentiment: getEmotionalTone(score), + topEmotions, + intensity: getIntensity(score, intensifierCount), + wordCount: words.length, + averageSentiment: score / words.length || 0 + }; + }; + + return { + analyzeText, + calculateSentimentScore: (text) => analyzeText(text).score, + getEmotionalTone: (text) => analyzeText(text).sentiment, + getTopWords: (text) => analyzeText(text).words + }; + }; + + // Re-use previous sentiment analysis functions + const { analyzeText, calculateSentimentScore, getEmotionalTone, getTopWords } = createSentimentAnalyzer(finalConfig); + + /** + * Fetches and extracts content from web pages + * + * @param {string} url - URL to analyze + * @returns {Promise<string>} Extracted text content + * + * Implementation Notes: + * - Uses progressive enhancement for content selection + * - Implements fallback strategies for content extraction + * - Handles various DOM structures + * + * Potential Enhancements: + * - Add support for dynamic content (SPA) + * - Implement content cleaning rules + * - Add support for paywalled content + * - Handle rate limiting + */ + const fetchContent = async (url) => { + try { + const response = await fetch(url); + const html = await response.text(); + + const dom = new JSDOM(html); + const doc = dom.window.document; + + // Enhanced content selectors + const contentSelectors = [ + 'article', + 'main', + '.content', + '.post-content', + '.entry-content', + '.article-content', + '.blog-post', + '.post', + 'article p', + '.content p', + 'p' + ]; + + let content = ''; + for (const selector of contentSelectors) { + const elements = doc.querySelectorAll(selector); + if (elements.length) { + elements.forEach(el => { + // Skip if element contains mostly navigation/header/footer content + if (el.closest('nav') || el.closest('header') || el.closest('footer')) { + return; + } + content += el.textContent + '\n\n'; + }); + if (content.trim().length > 0) break; + } + } + + // If no content was found through selectors, get all text content + if (!content) { + content = doc.body.textContent || ''; + } + + // Clean up the content + content = content + .replace(/\s+/g, ' ') + .replace(/\n\s*\n/g, '\n\n') + .trim(); + + // Ensure we're returning a string + return content || ''; + + } catch (error) { + console.error('Fetch error:', error); + throw new Error(`Failed to fetch content: ${error.message}`); + } + }; + + /** + * Extracts metadata from web documents + * + * @param {Document} doc - DOM document + * @returns {Object} Extracted metadata + * + * Implementation Notes: + * - Supports multiple metadata formats (meta tags, OpenGraph, etc.) + * - Uses fallback strategies for missing data + * + * Potential Improvements: + * - Add schema.org parsing + * - Support more metadata formats + * - Add validation and cleaning + */ + const extractMetadata = (doc) => { + const metadata = { + title: '', + description: '', + author: '', + date: '', + keywords: [] + }; + + // Extract meta tags with enhanced selectors + const metaTags = doc.querySelectorAll('meta'); + metaTags.forEach(tag => { + const name = tag.getAttribute('name')?.toLowerCase(); + const property = tag.getAttribute('property')?.toLowerCase(); + const content = tag.getAttribute('content'); + + if (content) { + if (name === 'description' || property === 'og:description') { + metadata.description = content; + } + if (name === 'author' || property === 'article:author') { + metadata.author = content; + } + if (name === 'keywords') { + metadata.keywords = content.split(',').map(k => k.trim()); + } + if (name === 'date' || property === 'article:published_time' || + property === 'article:modified_time') { + metadata.date = content; + } + } + }); + + // Try different title sources + metadata.title = doc.querySelector('meta[property="og:title"]')?.getAttribute('content') || + doc.querySelector('h1')?.textContent || + doc.title || ''; + + // Try to find author in structured data + const authorElement = doc.querySelector('[rel="author"], .author, .byline'); + if (authorElement && !metadata.author) { + metadata.author = authorElement.textContent.trim(); + } + + // Try to find date in structured data + if (!metadata.date) { + const dateElement = doc.querySelector('time, .date, .published'); + if (dateElement) { + metadata.date = dateElement.getAttribute('datetime') || dateElement.textContent.trim(); + } + } + + return metadata; + }; + + /** + * Analyzes sentiment of web page content + * + * @param {string} url - URL to analyze + * @returns {Promise<Object>} Complete analysis results + * + * Potential Enhancements: + * - Add caching + * - Implement batch processing + * - Add historical tracking + */ + const analyzeUrl = async (url) => { + try { + const content = await fetchContent(url); + + if (!content) { + console.warn(`No content found for URL: ${url}`); + return { + score: 0, + words: [], + summary: { positive: 0, negative: 0, neutral: 0 }, + sentiment: 'Neutral', + topEmotions: [], + intensity: 'None', + wordCount: 0, + url, + metadata: {}, + fetchDate: new Date().toISOString() + }; + } + + const analysis = analyzeText(content); + + // Create a new JSDOM instance for metadata extraction + const response = await fetch(url); + const html = await response.text(); + const dom = new JSDOM(html); + + // Additional URL-specific analysis + analysis.url = url; + analysis.metadata = extractMetadata(dom.window.document); + analysis.fetchDate = new Date().toISOString(); + + return analysis; + } catch (error) { + console.error('Analysis error:', error); + throw new Error(`Analysis failed: ${error.message}`); + } + }; + + // Enhanced API + return { + analyzeText, + analyzeUrl, + addPositiveWords: (words) => words.forEach(word => finalConfig.positiveWords.add(word)), + addNegativeWords: (words) => words.forEach(word => finalConfig.negativeWords.add(word)), + addIntensifier: (word, multiplier) => finalConfig.intensifiers.set(word, multiplier), + addNegator: (word) => finalConfig.negators.add(word), + getConfig: () => ({ ...finalConfig }), + getDictionaries: () => ({ + positiveCount: finalConfig.positiveWords.size, + negativeCount: finalConfig.negativeWords.size, + intensifierCount: finalConfig.intensifiers.size, + negatorCount: finalConfig.negators.size + }) + }; + }; + + // Example usage: + const analyzer = createWebSentimentAnalyzer(); + + /** + * Creates a visual representation of sentiment score + * + * @param {number} score - Sentiment score to visualize + * @returns {string} ASCII visualization of sentiment scale + * + * Design Notes: + * - Uses Unicode characters for better visualization + * - Implements fixed-width scale for consistent display + * + * Potential Enhancements: + * - Add color support + * - Implement alternative visualizations + * - Add interactive elements + */ + const createSentimentScale = (score) => { + const width = 40; // Width of the scale + const middle = Math.floor(width / 2); + // Clamp score between -10 and 10 for display purposes + const clampedScore = Math.max(-10, Math.min(10, score)); + const position = Math.round(middle + (clampedScore * middle / 10)); + + let scale = ''; + for (let i = 0; i < width; i++) { + if (i === middle) scale += '│'; // Using Unicode box drawing character + else if (i === position) scale += '●'; + else scale += '─'; + } + + // Simpler scale display without arrows and extra spacing + return ` +NEGATIVE ${' '.repeat(middle-5)}NEUTRAL${' '.repeat(middle-5)} POSITIVE +[-10] ${scale} [+10] +Score: ${score.toFixed(2)} +`; + }; + + /** + * Formats analysis results for human readability + * + * @param {Object} analysis - Analysis results to format + * @returns {string} Formatted analysis report + * + * Implementation Notes: + * - Uses structured format for consistency + * - Implements progressive disclosure of details + * + * Potential Improvements: + * - Add output format options (JSON, CSV, etc.) + * - Implement templating system + * - Add internationalization support + */ + const formatAnalysisResults = (analysis) => { + const { + score, + summary, + sentiment, + topEmotions, + intensity, + wordCount, + metadata, + url + } = analysis; + + return ` +=== Sentiment Analysis for ${metadata.title || url} === + +${createSentimentScale(score)} + +Overall Assessment: +• Sentiment: ${sentiment} (${intensity}) +• Total Words Analyzed: ${wordCount} + +Word Breakdown: +• Positive Words: ${summary.positive} +• Negative Words: ${summary.negative} +• Neutral Words: ${summary.neutral} + +${topEmotions.length ? `Dominant Emotions: +${topEmotions.map(e => `• ${e.emotion} (mentioned ${e.count} time${e.count > 1 ? 's' : ''})`).join('\n')}` : ''} + +Content Details: +• Author: ${metadata.author || 'Not specified'} +• Date: ${metadata.date || 'Not specified'} +${metadata.description ? `• Description: ${metadata.description}` : ''} + +Notable Words: +${analysis.words + .filter(w => w.score !== 0) + .slice(0, 5) + .map(w => `• "${w.word}" (${w.score > 0 ? 'positive' : 'negative'}, ${w.category || 'general'})`) + .join('\n')} + +${'-'.repeat(60)} +`; + }; + + // Update the analyzeWebPage function + const analyzeWebPage = async (url) => { + try { + const analysis = await analyzer.analyzeUrl(url); + console.log(formatAnalysisResults(analysis)); + } catch (error) { + console.error(`\n❌ Analysis failed for ${url}:`, error.message); + } + }; + + // Example: + // analyzeWebPage('https://example.com/blog-post'); + + // Add custom words +// analyzer.addPositiveWords(['groundbreaking', 'game-changing']); +// analyzer.addNegativeWords(['concerning', 'questionable']); +// analyzer.addIntensifier('incredibly', 1.8); +// analyzer.addNegator('lacks'); + +// // Get dictionary stats +// console.log(analyzer.getDictionaries()); + +// Remove the hard-coded URLs and add CLI handling +const helpText = ` +Sentiment Analyzer +================= + +Analyzes the sentiment of web pages and provides detailed emotional analysis. + +Usage: + bun run app.js <url> + bun run app.js <url1> <url2> <url3> ... + +Example: + bun run app.js https://example.com/blog-post + bun run app.js https://blog1.com https://blog2.com + +Options: + --help, -h Show this help message +`; + +/** + * CLI program entry point + * + * Implementation Notes: + * - Uses async/await for proper error handling + * - Implements command pattern for URL processing + * + * Potential Enhancements: + * - Add configuration file support + * - Implement batch processing from file + * - Add progress indicators + * - Add output formatting options + */ +const main = async () => { + // Get command line arguments (skip first two as they're node/bun and script path) + const args = process.argv.slice(2); + + // Show help if no arguments or help flag + if (args.length === 0 || args.includes('--help') || args.includes('-h')) { + console.log(helpText); + return; + } + + // Create analyzer instance + const analyzer = createWebSentimentAnalyzer(); + + // Analyze each URL + for (const url of args) { + try { + // Skip any help flags that might have been passed + if (url.startsWith('-')) continue; + + await analyzeWebPage(url); + } catch (error) { + console.error(`\n❌ Failed to analyze ${url}:`, error.message); + } + } +}; + +// Run the program +main().catch(error => { + console.error('Fatal error:', error); + process.exit(1); +}); \ No newline at end of file |