diff options
Diffstat (limited to 'js/sentiment')
-rw-r--r-- | js/sentiment/.gitignore | 175 | ||||
-rw-r--r-- | js/sentiment/README.md | 108 | ||||
-rw-r--r-- | js/sentiment/SCRATCH.md | 8 | ||||
-rw-r--r-- | js/sentiment/app.js | 765 | ||||
-rw-r--r-- | js/sentiment/bookmarklet-minified.js | 1 | ||||
-rw-r--r-- | js/sentiment/bookmarklet.js | 8 | ||||
-rwxr-xr-x | js/sentiment/bun.lockb | bin | 0 -> 25383 bytes | |||
-rw-r--r-- | js/sentiment/jsconfig.json | 27 | ||||
-rw-r--r-- | js/sentiment/package.json | 14 | ||||
-rw-r--r-- | js/sentiment/sentiment/PressStart2P-Regular.ttf | 0 | ||||
-rw-r--r-- | js/sentiment/sentiment/index.html | 224 | ||||
-rw-r--r-- | js/sentiment/sentiment/sentiment.browser.js | 209 |
12 files changed, 1539 insertions, 0 deletions
diff --git a/js/sentiment/.gitignore b/js/sentiment/.gitignore new file mode 100644 index 0000000..9b1ee42 --- /dev/null +++ b/js/sentiment/.gitignore @@ -0,0 +1,175 @@ +# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore + +# Logs + +logs +_.log +npm-debug.log_ +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Caches + +.cache + +# Diagnostic reports (https://nodejs.org/api/report.html) + +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# Runtime data + +pids +_.pid +_.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover + +lib-cov + +# Coverage directory used by tools like istanbul + +coverage +*.lcov + +# nyc test coverage + +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) + +.grunt + +# Bower dependency directory (https://bower.io/) + +bower_components + +# node-waf configuration + +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) + +build/Release + +# Dependency directories + +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) + +web_modules/ + +# TypeScript cache + +*.tsbuildinfo + +# Optional npm cache directory + +.npm + +# Optional eslint cache + +.eslintcache + +# Optional stylelint cache + +.stylelintcache + +# Microbundle cache + +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history + +.node_repl_history + +# Output of 'npm pack' + +*.tgz + +# Yarn Integrity file + +.yarn-integrity + +# dotenv environment variable files + +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) + +.parcel-cache + +# Next.js build output + +.next +out + +# Nuxt.js build / generate output + +.nuxt +dist + +# Gatsby files + +# Comment in the public line in if your project uses Gatsby and not Next.js + +# https://nextjs.org/blog/next-9-1#public-directory-support + +# public + +# vuepress build output + +.vuepress/dist + +# vuepress v2.x temp and cache directory + +.temp + +# Docusaurus cache and generated files + +.docusaurus + +# Serverless directories + +.serverless/ + +# FuseBox cache + +.fusebox/ + +# DynamoDB Local files + +.dynamodb/ + +# TernJS port file + +.tern-port + +# Stores VSCode versions used for testing VSCode extensions + +.vscode-test + +# yarn v2 + +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store diff --git a/js/sentiment/README.md b/js/sentiment/README.md new file mode 100644 index 0000000..141470d --- /dev/null +++ b/js/sentiment/README.md @@ -0,0 +1,108 @@ +# Sentiment Analyzer + +## Overview + +The Sentiment Analyzer is a JavaScript application designed to analyze the sentiment of web content. It utilizes a combination of dictionary-based sentiment analysis, emotion categorization, intensity analysis, web content extraction, and metadata parsing to provide a comprehensive emotional analysis of text from web pages. + +### Key Features + +- **Emotion Categorization**: Classifies emotions into various categories such as joy, sadness, anger, and more. +- **Intensity Analysis**: Measures the intensity of sentiments based on the context and usage of words. +- **Web Content Extraction**: Fetches and extracts meaningful content from web pages, ignoring irrelevant sections like headers and footers. +- **Metadata Parsing**: Extracts useful metadata such as titles, authors, and publication dates from web pages. + +## Installation + +To install dependencies, run: + +```bash +bun install +``` + +## Usage + +To run the sentiment analyzer, use the following command: + +```bash +bun run app.js <url> +``` + +You can also analyze multiple URLs at once: + +```bash +bun run app.js <url1> <url2> <url3> +``` + +### Example + +```bash +bun run app.js https://example.com/blog-post +``` + +### Help + +To display help information, use: + +```bash +bun run app.js --help +``` + +## Building a Static Binary + +Bun allows you to build your application as a static binary, which can be distributed and run without requiring a separate runtime environment. To build the Sentiment Analyzer as a binary, follow these steps: + +1. **Build the Binary**: Run the following command in your terminal: + + ```bash + bun build app.js --outdir ./bin --target node + ``` + + This command compiles your application into a single binary executable for Node.js and places it in the `./bin` directory. + +2. **Run the Binary**: After building, you can run the binary directly: + + ```bash + ./bin/app.js <url> + ``` + + Or for multiple URLs: + + ```bash + ./bin/app.js <url1> <url2> <url3> + ``` + +### Example + +```bash +./bin/app.js https://example.com/blog-post +``` + +## Extending the Program + +The Sentiment Analyzer is designed to be extensible. Here are some ways you can enhance its functionality: + +1. **Add More Dictionaries**: You can extend the positive and negative word dictionaries by adding more words or phrases relevant to specific contexts or industries. + +2. **Enhance Emotion Categories**: Modify the `emotionCategories` object in `app.js` to include additional emotions or synonyms that are relevant to your analysis needs. + +3. **Implement Machine Learning**: Consider integrating machine learning models for more advanced sentiment analysis that can learn from context and improve over time. + +4. **Support for Multiple Languages**: Extend the program to support sentiment analysis in different languages by adding language-specific dictionaries and rules. + +5. **Dynamic Content Handling**: Improve the content extraction logic to handle dynamic web pages (Single Page Applications) that load content asynchronously. + +6. **Batch Processing**: Implement functionality to read URLs from a file and process them in batches, which can be useful for analyzing large datasets. + +7. **Output Formatting Options**: Add options to format the output in different ways (e.g., JSON, CSV) for easier integration with other tools or systems. + +## Contributing + +Contributions are welcome! If you have suggestions for improvements or new features, feel free to open an issue or submit a pull request. + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## Acknowledgments + +This project was created using `bun init` in bun v1.1.29. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime. diff --git a/js/sentiment/SCRATCH.md b/js/sentiment/SCRATCH.md new file mode 100644 index 0000000..d5d4ca3 --- /dev/null +++ b/js/sentiment/SCRATCH.md @@ -0,0 +1,8 @@ +<https://getthematic.com/sentiment-analysis#how-does-sentiment-analysis-work> + +# Rule-based sentiment analysis works like this: + +1. “Lexicons” or lists of positive and negative words are created. These are words that are used to describe sentiment. For example, positive lexicons might include “fast”, “affordable”, and “user-friendly“. Negative lexicons could include “slow”, “pricey”, and “complicated”. +2. Before text can be analyzed it needs to be prepared. Several processes are used to format the text in a way that a machine can understand. Tokenization breaks up text into small chunks called tokens. Sentence tokenization splits up text into sentences. Word tokenization separates words in a sentence. For example, “the best customer service” would be split into “the”, “best”, and “customer service”. Lemmatization can be used to transforms words back to their root form. A lemma is the root form of a word. For example, the root form of “is, are, am, were, and been” is “be”. We also want to exclude things which are known but are not useful for sentiment analysis. So another important process is stopword removal which takes out common words like “for, at, a, to”. These words have little or no semantic value in the sentence. Applying these processes makes it easier for computers to understand the text. +3. A computer counts the number of positive or negative words in a particular text. A special rule can make sure that negated words, e.g. “not easy”, are counted as opposites. +4. The final step is to calculate the overall sentiment score for the text. As mentioned previously, this could be based on a scale of -100 to 100. In this case a score of 100 would be the highest score possible for positive sentiment. A score of 0 would indicate neutral sentiment. The score can also be expressed as a percentage, ranging from 0% as negative and 100% as positive. \ No newline at end of file diff --git a/js/sentiment/app.js b/js/sentiment/app.js new file mode 100644 index 0000000..1f66d92 --- /dev/null +++ b/js/sentiment/app.js @@ -0,0 +1,765 @@ +/** + * Web Sentiment Analyzer + * + * This program analyzes the sentiment of web content using a combination of: + * - Dictionary-based sentiment analysis + * - Emotion categorization + * - Intensity analysis + * - Web content extraction + * - Metadata parsing + * + * Architecture Overview: + * - Factory Pattern: Uses createWebSentimentAnalyzer to create analyzer instances + * - Builder Pattern: Configurable through dictionary additions and modifications + * - Strategy Pattern: Separates content fetching, analysis, and display logic + * - Command Pattern: CLI interface for processing multiple URLs + * + * @module sentiment-analyzer + */ + +import { JSDOM } from 'jsdom'; + +/** + * Creates a web-enabled sentiment analyzer with extended capabilities + * + * @param {Object} config - Optional configuration to override default dictionaries + * @returns {Object} An analyzer instance with public methods for sentiment analysis + * + * Extensibility Points: + * - Add more dictionaries (e.g., industry-specific terms) + * - Enhance emotion categories + * - Add language support + * - Implement ML-based sentiment analysis + */ +const createWebSentimentAnalyzer = (config = {}) => { + /** + * Default configuration with extensive sentiment dictionaries + * + * @property {Set} positiveWords - Words indicating positive sentiment + * @property {Set} negativeWords - Words indicating negative sentiment + * @property {Map} intensifiers - Words that modify sentiment intensity + * @property {Set} negators - Words that negate sentiment + * + * Potential Enhancements: + * - Add multi-word phrases + * - Include context-dependent sentiments + * - Add domain-specific dictionaries + */ + const defaultConfig = { + positiveWords: new Set([ + // Emotional positives + 'love', 'joy', 'happy', 'excited', 'peaceful', 'wonderful', 'fantastic', + 'delighted', 'pleased', 'glad', 'cheerful', 'content', 'satisfied', + 'grateful', 'thankful', 'blessed', 'optimistic', 'hopeful', + + // Quality positives + 'excellent', 'outstanding', 'superb', 'magnificent', 'brilliant', + 'exceptional', 'perfect', 'remarkable', 'spectacular', 'impressive', + 'incredible', 'amazing', 'extraordinary', 'marvelous', 'wonderful', + + // Performance positives + 'efficient', 'effective', 'reliable', 'innovative', 'productive', + 'successful', 'accomplished', 'achieved', 'improved', 'enhanced', + 'optimized', 'streamlined', 'breakthrough', 'revolutionary', + + // Relationship positives + 'friendly', 'helpful', 'supportive', 'kind', 'generous', 'caring', + 'compassionate', 'thoughtful', 'considerate', 'engaging', 'collaborative', + + // Experience positives + 'enjoyable', 'fun', 'entertaining', 'engaging', 'interesting', 'fascinating', + 'captivating', 'inspiring', 'motivating', 'enriching', 'rewarding', + + // Growth positives + 'growing', 'improving', 'developing', 'advancing', 'progressing', + 'evolving', 'flourishing', 'thriving', 'prospering', 'succeeding' + ]), + + negativeWords: new Set([ + // Emotional negatives + 'hate', 'angry', 'sad', 'upset', 'frustrated', 'disappointed', 'anxious', + 'worried', 'stressed', 'depressed', 'miserable', 'unhappy', 'distressed', + 'irritated', 'annoyed', 'furious', 'outraged', 'bitter', + + // Quality negatives + 'poor', 'bad', 'terrible', 'horrible', 'awful', 'dreadful', 'inferior', + 'mediocre', 'subpar', 'unacceptable', 'disappointing', 'inadequate', + 'deficient', 'flawed', 'defective', + + // Performance negatives + 'inefficient', 'ineffective', 'unreliable', 'problematic', 'failing', + 'broken', 'malfunctioning', 'corrupted', 'crashed', 'buggy', 'error', + 'failed', 'unsuccessful', 'unproductive', + + // Relationship negatives + 'hostile', 'unfriendly', 'unhelpful', 'rude', 'mean', 'cruel', 'harsh', + 'inconsiderate', 'selfish', 'aggressive', 'confrontational', 'toxic', + + // Experience negatives + 'boring', 'dull', 'tedious', 'monotonous', 'uninteresting', 'tiresome', + 'exhausting', 'frustrating', 'confusing', 'complicated', 'difficult', + + // Decline negatives + 'declining', 'deteriorating', 'worsening', 'failing', 'regressing', + 'degrading', 'diminishing', 'decreasing', 'falling', 'shrinking' + ]), + + intensifiers: new Map([ + // Strong intensifiers + ['extremely', 2.0], + ['absolutely', 2.0], + ['completely', 2.0], + ['totally', 2.0], + ['entirely', 2.0], + ['utterly', 2.0], + + // Moderate intensifiers + ['very', 1.5], + ['really', 1.5], + ['particularly', 1.5], + ['especially', 1.5], + ['notably', 1.5], + ['significantly', 1.5], + + // Mild intensifiers + ['quite', 1.25], + ['rather', 1.25], + ['somewhat', 1.25], + ['fairly', 1.25], + ['pretty', 1.25], + ['relatively', 1.25], + + // Emphatic phrases + ['without a doubt', 2.0], + ['beyond question', 2.0], + ['by far', 1.75], + ['to a great extent', 1.75] + ]), + + negators: new Set([ + // Direct negators + 'not', 'no', 'never', 'none', 'neither', 'nor', 'nothing', + + // Contracted negators + "n't", 'cannot', "won't", "wouldn't", "shouldn't", "couldn't", "haven't", + "hasn't", "didn't", "isn't", "aren't", "weren't", + + // Complex negators + 'hardly', 'scarcely', 'barely', 'rarely', 'seldom', 'few', 'little', + 'nowhere', 'nobody', 'none', 'by no means', 'on no account', + + // Implicit negators + 'deny', 'reject', 'refuse', 'prevent', 'avoid', 'stop', 'exclude', + 'doubt', 'question', 'dispute' + ]) + }; + + // Merge with provided config + const finalConfig = { ...defaultConfig, ...config }; + + /** + * Core sentiment analyzer implementation + * + * @param {Object} config - Configuration object with word dictionaries + * @returns {Object} Methods for text analysis + * + * Implementation Notes: + * - Uses a sliding window approach for context analysis + * - Implements multiplier-based intensity scoring + * - Categorizes emotions using predefined taxonomies + */ + const createSentimentAnalyzer = (config) => { + /** + * Main text analysis function + * + * @param {string} text - Text to analyze + * @returns {Object} Comprehensive analysis results + * + * Potential Improvements: + * - Add sentence-level analysis + * - Implement paragraph breakdown + * - Add statistical confidence scores + * - Consider word positioning and emphasis + */ + const analyzeText = (text) => { + // Ensure text is a string and has content + if (!text || typeof text !== 'string') { + console.warn('Invalid input to analyzeText:', text); + return { + score: 0, + words: [], + summary: { positive: 0, negative: 0, neutral: 0 }, + sentiment: 'Neutral', + topEmotions: [], + intensity: 'None', + wordCount: 0 + }; + } + + const words = text.toLowerCase() + .replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, '') + .split(/\s+/); + + let score = 0; + let multiplier = 1; + const analyzedWords = []; + const emotionCounts = new Map(); + let positiveCount = 0; + let negativeCount = 0; + let intensifierCount = 0; + + // Emotion categories for classification + const emotionCategories = { + // Strong positive emotions (weight: 2.0) + joy: { + weight: 2.0, + words: ['happy', 'joy', 'delighted', 'pleased', 'excited', 'ecstatic'] + }, + love: { + weight: 2.0, + words: ['loving', 'adoring', 'fond', 'affectionate', 'caring'] + }, + + // Moderate positive emotions (weight: 1.5) + satisfaction: { + weight: 1.5, + words: ['content', 'satisfied', 'fulfilled', 'pleased', 'accomplished'] + }, + + // Strong negative emotions (weight: -2.0) + anger: { + weight: -2.0, + words: ['angry', 'furious', 'outraged', 'enraged', 'hostile'] + }, + hate: { + weight: -2.0, + words: ['hate', 'despise', 'loathe', 'detest', 'abhor'] + }, + + // Moderate negative emotions (weight: -1.5) + frustration: { + weight: -1.5, + words: ['frustrated', 'annoyed', 'irritated', 'agitated'] + } + // ... other categories with appropriate weights + }; + + for (let i = 0; i < words.length; i++) { + const word = words[i]; + let wordImpact = { + word, + score: 0, + multiplier, + category: null + }; + + // Check for intensifiers + if (config.intensifiers.has(word)) { + multiplier = config.intensifiers.get(word); + intensifierCount++; + continue; + } + + // Check for negators + if (config.negators.has(word)) { + multiplier *= -1; + continue; + } + + // Score the word and categorize emotions + if (config.positiveWords.has(word)) { + // Increase base score for positive words + const baseScore = 2; + const wordScore = baseScore * multiplier; + score += wordScore; + positiveCount++; + wordImpact.score = wordScore; + + // Categorize emotion with weights + for (const [category, {weight, words}] of Object.entries(emotionCategories)) { + if (words.includes(word)) { + wordImpact.category = category; + emotionCounts.set(category, (emotionCounts.get(category) || 0) + 1); + // Add weighted bonus score + score += weight * multiplier; + break; + } + } + + } else if (config.negativeWords.has(word)) { + // Increase base score for negative words + const baseScore = -2; + const wordScore = baseScore * multiplier; + score += wordScore; + negativeCount++; + wordImpact.score = wordScore; + + // Categorize emotion with weights + for (const [category, {weight, words}] of Object.entries(emotionCategories)) { + if (words.includes(word)) { + wordImpact.category = category; + emotionCounts.set(category, (emotionCounts.get(category) || 0) + 1); + // Add weighted bonus score + score -= weight * multiplier; + break; + } + } + } + + if (wordImpact.score !== 0 || wordImpact.category) { + analyzedWords.push(wordImpact); + } + + // Reset multiplier after scoring a word + multiplier = 1; + } + + // Calculate intensity based on score magnitude and intensifier usage + const getIntensity = (score, intensifierCount) => { + const magnitude = Math.abs(score); + if (magnitude > 10 || intensifierCount > 5) return 'Very Strong'; + if (magnitude > 7 || intensifierCount > 3) return 'Strong'; + if (magnitude > 4 || intensifierCount > 1) return 'Moderate'; + if (magnitude > 0) return 'Mild'; + return 'Neutral'; + }; + + // Get top emotions + const topEmotions = Array.from(emotionCounts.entries()) + .sort((a, b) => b[1] - a[1]) + .slice(0, 3) + .map(([emotion, count]) => ({ emotion, count })); + + // Calculate the final score and clamp it between -10 and 10 + const clampedScore = Math.max(-10, Math.min(10, score)); + + // Only count words that contribute to sentiment + const sentimentWords = positiveCount + negativeCount; + const averageSentiment = sentimentWords > 0 ? clampedScore / sentimentWords : 0; + + return { + score: clampedScore, + words: analyzedWords, + summary: { + positive: positiveCount, + negative: negativeCount, + sentiment_words: sentimentWords, + total: words.length + }, + sentiment: getEmotionalTone(clampedScore), + topEmotions, + intensity: getIntensity(clampedScore, intensifierCount), + wordCount: words.length, + averageSentiment + }; + }; + + return { + analyzeText, + calculateSentimentScore: (text) => analyzeText(text).score, + getEmotionalTone: (text) => analyzeText(text).sentiment, + getTopWords: (text) => analyzeText(text).words + }; + }; + + // Re-use previous sentiment analysis functions + const { analyzeText, calculateSentimentScore, getEmotionalTone, getTopWords } = createSentimentAnalyzer(finalConfig); + + /** + * Fetches and extracts content from web pages + * + * @param {string} url - URL to analyze + * @returns {Promise<string>} Extracted text content + * + * Implementation Notes: + * - Uses progressive enhancement for content selection + * - Implements fallback strategies for content extraction + * - Handles various DOM structures + * + * Potential Enhancements: + * - Add support for dynamic content (SPA) + * - Implement content cleaning rules + * - Add support for paywalled content + * - Handle rate limiting + */ + const fetchContent = async (url) => { + try { + const response = await fetch(url); + const html = await response.text(); + + const dom = new JSDOM(html); + const doc = dom.window.document; + + // Enhanced content selectors + const contentSelectors = [ + 'article', + 'main', + '.content', + '.post-content', + '.entry-content', + '.article-content', + '.blog-post', + '.post', + 'article p', + '.content p', + 'p' + ]; + + let content = ''; + for (const selector of contentSelectors) { + const elements = doc.querySelectorAll(selector); + if (elements.length) { + elements.forEach(el => { + // Skip if element contains mostly navigation/header/footer content + if (el.closest('nav') || el.closest('header') || el.closest('footer')) { + return; + } + content += el.textContent + '\n\n'; + }); + if (content.trim().length > 0) break; + } + } + + // If no content was found through selectors, get all text content + if (!content) { + content = doc.body.textContent || ''; + } + + // Clean up the content + content = content + .replace(/\s+/g, ' ') + .replace(/\n\s*\n/g, '\n\n') + .trim(); + + // Ensure we're returning a string + return content || ''; + + } catch (error) { + console.error('Fetch error:', error); + throw new Error(`Failed to fetch content: ${error.message}`); + } + }; + + /** + * Extracts metadata from web documents + * + * @param {Document} doc - DOM document + * @returns {Object} Extracted metadata + * + * Implementation Notes: + * - Supports multiple metadata formats (meta tags, OpenGraph, etc.) + * - Uses fallback strategies for missing data + * + * Potential Improvements: + * - Add schema.org parsing + * - Support more metadata formats + * - Add validation and cleaning + */ + const extractMetadata = (doc) => { + const metadata = { + title: '', + description: '', + author: '', + date: '', + keywords: [] + }; + + // Extract meta tags with enhanced selectors + const metaTags = doc.querySelectorAll('meta'); + metaTags.forEach(tag => { + const name = tag.getAttribute('name')?.toLowerCase(); + const property = tag.getAttribute('property')?.toLowerCase(); + const content = tag.getAttribute('content'); + + if (content) { + if (name === 'description' || property === 'og:description') { + metadata.description = content; + } + if (name === 'author' || property === 'article:author') { + metadata.author = content; + } + if (name === 'keywords') { + metadata.keywords = content.split(',').map(k => k.trim()); + } + if (name === 'date' || property === 'article:published_time' || + property === 'article:modified_time') { + metadata.date = content; + } + } + }); + + // Try different title sources + metadata.title = doc.querySelector('meta[property="og:title"]')?.getAttribute('content') || + doc.querySelector('h1')?.textContent || + doc.title || ''; + + // Try to find author in structured data + const authorElement = doc.querySelector('[rel="author"], .author, .byline'); + if (authorElement && !metadata.author) { + metadata.author = authorElement.textContent.trim(); + } + + // Try to find date in structured data + if (!metadata.date) { + const dateElement = doc.querySelector('time, .date, .published'); + if (dateElement) { + metadata.date = dateElement.getAttribute('datetime') || dateElement.textContent.trim(); + } + } + + return metadata; + }; + + /** + * Analyzes sentiment of web page content + * + * @param {string} url - URL to analyze + * @returns {Promise<Object>} Complete analysis results + * + * Potential Enhancements: + * - Add caching + * - Implement batch processing + * - Add historical tracking + */ + const analyzeUrl = async (url) => { + try { + const content = await fetchContent(url); + + if (!content) { + console.warn(`No content found for URL: ${url}`); + return { + score: 0, + words: [], + summary: { positive: 0, negative: 0, neutral: 0 }, + sentiment: 'Neutral', + topEmotions: [], + intensity: 'None', + wordCount: 0, + url, + metadata: {}, + fetchDate: new Date().toISOString() + }; + } + + const analysis = analyzeText(content); + + // Create a new JSDOM instance for metadata extraction + const response = await fetch(url); + const html = await response.text(); + const dom = new JSDOM(html); + + // Additional URL-specific analysis + analysis.url = url; + analysis.metadata = extractMetadata(dom.window.document); + analysis.fetchDate = new Date().toISOString(); + + return analysis; + } catch (error) { + console.error('Analysis error:', error); + throw new Error(`Analysis failed: ${error.message}`); + } + }; + + // Enhanced API + return { + analyzeText, + analyzeUrl, + addPositiveWords: (words) => words.forEach(word => finalConfig.positiveWords.add(word)), + addNegativeWords: (words) => words.forEach(word => finalConfig.negativeWords.add(word)), + addIntensifier: (word, multiplier) => finalConfig.intensifiers.set(word, multiplier), + addNegator: (word) => finalConfig.negators.add(word), + getConfig: () => ({ ...finalConfig }), + getDictionaries: () => ({ + positiveCount: finalConfig.positiveWords.size, + negativeCount: finalConfig.negativeWords.size, + intensifierCount: finalConfig.intensifiers.size, + negatorCount: finalConfig.negators.size + }) + }; + }; + + // Example usage: + const analyzer = createWebSentimentAnalyzer(); + + /** + * Creates a visual representation of sentiment score + * + * @param {number} score - Sentiment score to visualize + * @returns {string} ASCII visualization of sentiment scale + * + * Design Notes: + * - Uses Unicode characters for better visualization + * - Implements fixed-width scale for consistent display + * + * Potential Enhancements: + * - Add color support + * - Implement alternative visualizations + * - Add interactive elements + */ + const createSentimentScale = (score) => { + const width = 40; // Width of the scale + const middle = Math.floor(width / 2); + // Clamp score between -10 and 10 for display purposes + const clampedScore = Math.max(-10, Math.min(10, score)); + const position = Math.round(middle + (clampedScore * middle / 10)); + + let scale = ''; + for (let i = 0; i < width; i++) { + if (i === middle) scale += '│'; // Using Unicode box drawing character + else if (i === position) scale += '●'; + else scale += '─'; + } + + // Simpler scale display without arrows and extra spacing + return ` +NEGATIVE ${' '.repeat(middle-5)}NEUTRAL${' '.repeat(middle-5)} POSITIVE +[-10] ${scale} [+10] +Score: ${score.toFixed(2)} +`; + }; + + /** + * Formats analysis results for human readability + * + * @param {Object} analysis - Analysis results to format + * @returns {string} Formatted analysis report + * + * Implementation Notes: + * - Uses structured format for consistency + * - Implements progressive disclosure of details + * + * Potential Improvements: + * - Add output format options (JSON, CSV, etc.) + * - Implement templating system + * - Add internationalization support + */ + const formatAnalysisResults = (analysis) => { + const { + score, + summary, + sentiment, + topEmotions, + intensity, + wordCount, + metadata, + url + } = analysis; + + return ` +=== Sentiment Analysis for ${metadata.title || url} === + +${createSentimentScale(score)} + +Overall Assessment: +• Sentiment: ${sentiment} (${intensity}) +• Total Words Analyzed: ${wordCount} + +Word Breakdown: +• Positive Words: ${summary.positive} +• Negative Words: ${summary.negative} +• Sentiment-Carrying Words: ${summary.sentiment_words} (of ${summary.total} total) + +${topEmotions.length ? `Dominant Emotions: +${topEmotions.map(e => `• ${e.emotion} (mentioned ${e.count} time${e.count > 1 ? 's' : ''})`).join('\n')}` : ''} + +Content Details: +• Author: ${metadata.author || 'Not specified'} +• Date: ${metadata.date || 'Not specified'} +${metadata.description ? `• Description: ${metadata.description}` : ''} + +Notable Words: +${analysis.words + .filter(w => w.score !== 0) + .slice(0, 5) + .map(w => `• "${w.word}" (${w.score > 0 ? 'positive' : 'negative'}, ${w.category || 'general'})`) + .join('\n')} + +${'-'.repeat(60)} +`; + }; + + // Update the analyzeWebPage function + const analyzeWebPage = async (url) => { + try { + const analysis = await analyzer.analyzeUrl(url); + console.log(formatAnalysisResults(analysis)); + } catch (error) { + console.error(`\n❌ Analysis failed for ${url}:`, error.message); + } + }; + + // Example: + // analyzeWebPage('https://example.com/blog-post'); + + // Add custom words +// analyzer.addPositiveWords(['groundbreaking', 'game-changing']); +// analyzer.addNegativeWords(['concerning', 'questionable']); +// analyzer.addIntensifier('incredibly', 1.8); +// analyzer.addNegator('lacks'); + +// // Get dictionary stats +// console.log(analyzer.getDictionaries()); + +// Remove the hard-coded URLs and add CLI handling +const helpText = ` +Sentiment Analyzer +================= + +Analyzes the sentiment of web pages and provides detailed emotional analysis. + +Usage: + bun run app.js <url> + bun run app.js <url1> <url2> <url3> ... + +Example: + bun run app.js https://example.com/blog-post + bun run app.js https://blog1.com https://blog2.com + +Options: + --help, -h Show this help message +`; + +/** + * CLI program entry point + * + * Implementation Notes: + * - Uses async/await for proper error handling + * - Implements command pattern for URL processing + * + * Potential Enhancements: + * - Add configuration file support + * - Implement batch processing from file + * - Add progress indicators + * - Add output formatting options + */ +const main = async () => { + // Get command line arguments (skip first two as they're node/bun and script path) + const args = process.argv.slice(2); + + // Show help if no arguments or help flag + if (args.length === 0 || args.includes('--help') || args.includes('-h')) { + console.log(helpText); + return; + } + + // Create analyzer instance + // const analyzer = createWebSentimentAnalyzer(); + + // Analyze each URL + for (const url of args) { + try { + // Skip any help flags that might have been passed + if (url.startsWith('-')) continue; + + await analyzeWebPage(url); + } catch (error) { + console.error(`\n❌ Failed to analyze ${url}:`, error.message); + } + } +}; + +// Run the program +main().catch(error => { + console.error('Fatal error:', error); + process.exit(1); +}); \ No newline at end of file diff --git a/js/sentiment/bookmarklet-minified.js b/js/sentiment/bookmarklet-minified.js new file mode 100644 index 0000000..4671638 --- /dev/null +++ b/js/sentiment/bookmarklet-minified.js @@ -0,0 +1 @@ +javascript:void function(){let e=document.createElement("script");e.src="https://eli.li/_assets/bin/sentiment.browser.js",e.onload=function(){analyzePage()},document.head.appendChild(e)}(); \ No newline at end of file diff --git a/js/sentiment/bookmarklet.js b/js/sentiment/bookmarklet.js new file mode 100644 index 0000000..9209754 --- /dev/null +++ b/js/sentiment/bookmarklet.js @@ -0,0 +1,8 @@ +javascript:(function(){ + const script = document.createElement('script'); + script.src = 'https://eli.li/_assets/bin/sentiment.browser.js'; + script.onload = function() { + analyzePage(); + }; + document.head.appendChild(script); +})(); \ No newline at end of file diff --git a/js/sentiment/bun.lockb b/js/sentiment/bun.lockb new file mode 100755 index 0000000..00cbdac --- /dev/null +++ b/js/sentiment/bun.lockb Binary files differdiff --git a/js/sentiment/jsconfig.json b/js/sentiment/jsconfig.json new file mode 100644 index 0000000..238655f --- /dev/null +++ b/js/sentiment/jsconfig.json @@ -0,0 +1,27 @@ +{ + "compilerOptions": { + // Enable latest features + "lib": ["ESNext", "DOM"], + "target": "ESNext", + "module": "ESNext", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +} diff --git a/js/sentiment/package.json b/js/sentiment/package.json new file mode 100644 index 0000000..c55b3ee --- /dev/null +++ b/js/sentiment/package.json @@ -0,0 +1,14 @@ +{ + "name": "sentiment", + "module": "app.js", + "type": "module", + "devDependencies": { + "@types/bun": "latest" + }, + "peerDependencies": { + "typescript": "^5.0.0" + }, + "dependencies": { + "jsdom": "^26.0.0" + } +} \ No newline at end of file diff --git a/js/sentiment/sentiment/PressStart2P-Regular.ttf b/js/sentiment/sentiment/PressStart2P-Regular.ttf new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/js/sentiment/sentiment/PressStart2P-Regular.ttf diff --git a/js/sentiment/sentiment/index.html b/js/sentiment/sentiment/index.html new file mode 100644 index 0000000..f84d42c --- /dev/null +++ b/js/sentiment/sentiment/index.html @@ -0,0 +1,224 @@ +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>Naive Sentiment Analyzer</title> + <link href="https://smallandnearlysilent.com/sentiment/PressStart2P-Regular.ttf" rel="stylesheet"> + <style> + @font-face { + font-family: 'Press Start 2P'; + src: url('https://smallandnearlysilent.com/sentiment/PressStart2P-Regular.ttf') format('truetype'); + font-weight: normal; + font-style: normal; + } + :root { + --gba-dark: #081820; + --gba-mid: #346856; + --gba-light: #88c070; + --gba-pale: #e0f8d0; + } + + body { + font-family: 'Press Start 2P', monospace; + line-height: 1.6; + max-width: 800px; + margin: 0 auto; + padding: 20px; + background-color: var(--gba-dark); + color: var(--gba-pale); + font-size: 12px; + } + + /* Retro window styling */ + .window { + border: 4px solid var(--gba-pale); + border-radius: 0; + padding: 20px; + margin: 20px 0; + position: relative; + background: var(--gba-dark); + } + + .window::before { + content: ''; + position: absolute; + top: -8px; + left: -8px; + right: -8px; + bottom: -8px; + border: 2px solid var(--gba-dark); + z-index: -1; + } + + h1 { + color: var(--gba-light); + text-align: center; + margin: 40px 0; + text-transform: uppercase; + letter-spacing: 2px; + text-shadow: + 2px 2px 0 var(--gba-dark), + 4px 4px 0 var(--gba-mid); + } + + h2 { + color: var(--gba-light); + font-size: 14px; + margin-top: 30px; + border-bottom: 4px solid var(--gba-mid); + padding-bottom: 10px; + } + + .bookmarklet { + display: inline-block; + padding: 15px 30px; + background: var(--gba-light); + color: var(--gba-dark); + text-decoration: none; + margin: 20px 0; + cursor: move; + border: 4px solid var(--gba-pale); + box-shadow: + 4px 4px 0 var(--gba-mid), + 8px 8px 0 var(--gba-dark); + transition: all 0.1s ease; + } + + .bookmarklet:hover { + transform: translate(2px, 2px); + box-shadow: + 2px 2px 0 var(--gba-mid), + 6px 6px 0 var(--gba-dark); + } + + .installation { + border: 4px solid var(--gba-light); + padding: 20px; + margin: 20px 0; + background: var(--gba-mid); + } + + code { + background: var(--gba-dark); + padding: 4px 8px; + border: 2px solid var(--gba-light); + color: var(--gba-pale); + } + + .warning { + border: 4px solid var(--gba-light); + border-style: dashed; + padding: 20px; + margin: 20px 0; + animation: blink 2s infinite; + } + + @keyframes blink { + 0% { border-color: var(--gba-light); } + 50% { border-color: var(--gba-mid); } + 100% { border-color: var(--gba-light); } + } + + ul, ol { + padding-left: 20px; + } + + li { + margin: 10px 0; + position: relative; + } + + li::before { + content: '►'; + position: absolute; + left: -20px; + color: var(--gba-light); + } + + footer { + margin-top: 40px; + padding-top: 20px; + border-top: 4px solid var(--gba-mid); + color: var(--gba-light); + text-align: center; + font-size: 10px; + } + + a { + color: var(--gba-light); + text-decoration: none; + } + + a:hover { + color: var(--gba-pale); + text-decoration: underline; + } + + /* Pixel art decorations */ + .pixel-corner { + position: fixed; + width: 32px; + height: 32px; + background: var(--gba-light); + clip-path: polygon(0 0, 100% 0, 0 100%); + } + + .top-left { top: 0; left: 0; } + .top-right { top: 0; right: 0; transform: rotate(90deg); } + .bottom-left { bottom: 0; left: 0; transform: rotate(-90deg); } + .bottom-right { bottom: 0; right: 0; transform: rotate(180deg); } + + </style> +</head> +<body> + <div class="pixel-corner top-left"></div> + <div class="pixel-corner top-right"></div> + <div class="pixel-corner bottom-left"></div> + <div class="pixel-corner bottom-right"></div> + + <h1>Sentiment Scanner</h1> + + <div class="window"> + <p> + This bookmarklet analyzes the emotional tone of any webpage...badly. + </p> + </div> + + <div class="installation"> + <h2>Installation guide</h2> + <p><strong>Drag this bookmarklet to your bookmarks bar:</strong></p> + <a class="bookmarklet" href="javascript:void function(){let e=document.createElement('script');e.src='https://smallandnearlysilent.com/sentiment/sentiment.browser.js',e.onload=function(){analyzePage()},document.head.appendChild(e)}();"> + Sentiment + </a> + </div> + + <h2>How to use it</h2> + <ol> + <li>Navigate to a web page</li> + <li>Click the bookmarklet in your bookmarks bar</li> + <li>It'll analyze the page and display the results in a popup</li> + </ol> + + <div class="warning"> + <h3>Considerations</h3> + <ul> + <li>Primarily works with articles and blog posts</li> + <li>English text only</li> + </ul> + </div> + + <footer> + <p> + <a href="sentiment.browser.js" target="_blank">VIEW THE SCRIPT</a> + </p> + </footer> + + <script> + document.querySelector('.bookmarklet').addEventListener('click', function(e) { + e.preventDefault(); + alert('COMMAND INVALID!\nDRAG TO BOOKMARKS INSTEAD OF CLICKING!'); + }); + </script> +</body> +</html> \ No newline at end of file diff --git a/js/sentiment/sentiment/sentiment.browser.js b/js/sentiment/sentiment/sentiment.browser.js new file mode 100644 index 0000000..e909594 --- /dev/null +++ b/js/sentiment/sentiment/sentiment.browser.js @@ -0,0 +1,209 @@ +const createWebSentimentAnalyzer = (config = {}) => { + const defaultConfig = { + positiveWords: new Set([ + 'love', 'joy', 'happy', 'excited', 'peaceful', 'wonderful', 'fantastic', + 'excellent', 'outstanding', 'superb', 'brilliant', 'helpful', 'great', + 'efficient', 'effective', 'reliable', 'innovative', 'productive', + 'friendly', 'supportive', 'kind', 'generous', 'caring' + ]), + + negativeWords: new Set([ + 'hate', 'angry', 'sad', 'upset', 'frustrated', 'disappointed', 'anxious', + 'poor', 'bad', 'terrible', 'horrible', 'awful', 'dreadful', 'inferior', + 'inefficient', 'ineffective', 'unreliable', 'problematic', 'failing', + 'hostile', 'unfriendly', 'unhelpful', 'rude', 'mean' + ]), + + intensifiers: new Map([ + ['extremely', 2.0], + ['very', 1.5], + ['really', 1.5], + ['quite', 1.25] + ]), + + negators: new Set([ + 'not', 'no', 'never', 'none', + "n't", 'cannot', "won't", "wouldn't" + ]) + }; + + const finalConfig = { ...defaultConfig, ...config }; + + const analyzeText = (text) => { + if (!text || typeof text !== 'string') { + return { + score: 0, + summary: { positive: 0, negative: 0, neutral: 0 }, + sentiment: 'Neutral', + intensity: 'None', + wordCount: 0 + }; + } + + const words = text.toLowerCase() + .replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, '') + .split(/\s+/); + + let score = 0; + let multiplier = 1; + let positiveCount = 0; + let negativeCount = 0; + let intensifierCount = 0; + + for (let i = 0; i < words.length; i++) { + const word = words[i]; + + if (finalConfig.intensifiers.has(word)) { + multiplier = finalConfig.intensifiers.get(word); + intensifierCount++; + continue; + } + + if (finalConfig.negators.has(word)) { + multiplier *= -1; + continue; + } + + if (finalConfig.positiveWords.has(word)) { + score += 1 * multiplier; + positiveCount++; + } else if (finalConfig.negativeWords.has(word)) { + score += -1 * multiplier; + negativeCount++; + } + + multiplier = 1; + } + + const getIntensity = (score, intensifierCount) => { + const magnitude = Math.abs(score); + if (magnitude > 10 || intensifierCount > 5) return 'Very Strong'; + if (magnitude > 7 || intensifierCount > 3) return 'Strong'; + if (magnitude > 4 || intensifierCount > 1) return 'Moderate'; + if (magnitude > 0) return 'Mild'; + return 'Neutral'; + }; + + const getSentiment = (score) => { + if (score > 5) return 'Very Positive'; + if (score > 0) return 'Positive'; + if (score < -5) return 'Very Negative'; + if (score < 0) return 'Negative'; + return 'Neutral'; + }; + + return { + score, + summary: { + positive: positiveCount, + negative: negativeCount, + neutral: words.length - positiveCount - negativeCount, + total: words.length + }, + sentiment: getSentiment(score), + intensity: getIntensity(score, intensifierCount), + wordCount: words.length + }; + }; + + const extractPageContent = () => { + // Priority content selectors + const contentSelectors = [ + 'article', + 'main', + '.content', + '.post-content', + 'article p', + '.content p', + 'p' + ]; + + let content = ''; + for (const selector of contentSelectors) { + const elements = document.querySelectorAll(selector); + if (elements.length) { + elements.forEach(el => { + if (!el.closest('nav') && !el.closest('header') && !el.closest('footer')) { + content += el.textContent + '\n\n'; + } + }); + if (content.trim().length > 0) break; + } + } + + if (!content) { + content = document.body.textContent || ''; + } + + return content + .replace(/\s+/g, ' ') + .replace(/\n\s*\n/g, '\n\n') + .trim(); + }; + + const createResultsOverlay = (analysis) => { + const overlay = document.createElement('div'); + overlay.style.cssText = ` + position: fixed; + top: 20px; + right: 20px; + max-width: 400px; + background: white; + border: 2px solid #ccc; + border-radius: 8px; + padding: 20px; + box-shadow: 0 4px 12px rgba(0,0,0,0.15); + z-index: 999999; + font-family: Arial, sans-serif; + font-size: 14px; + line-height: 1.4; + `; + + const closeButton = document.createElement('button'); + closeButton.textContent = '×'; + closeButton.style.cssText = ` + position: absolute; + top: 10px; + right: 10px; + border: none; + background: none; + font-size: 20px; + cursor: pointer; + color: #666; + `; + closeButton.onclick = () => overlay.remove(); + + const content = document.createElement('div'); + content.innerHTML = ` + <h2 style="margin: 0 0 15px 0; color: #333;">Sentiment Analysis</h2> + <p style="margin: 0 0 10px 0;"><strong>Overall Sentiment:</strong> ${analysis.sentiment}</p> + <p style="margin: 0 0 10px 0;"><strong>Intensity:</strong> ${analysis.intensity}</p> + <p style="margin: 0 0 10px 0;"><strong>Score:</strong> ${analysis.score.toFixed(2)}</p> + <hr style="margin: 15px 0; border: none; border-top: 1px solid #eee;"> + <p style="margin: 0 0 10px 0;"><strong>Word Count:</strong> ${analysis.wordCount}</p> + <p style="margin: 0 0 5px 0;"><strong>Breakdown:</strong></p> + <ul style="margin: 0; padding-left: 20px;"> + <li>Positive Words: ${analysis.summary.positive}</li> + <li>Negative Words: ${analysis.summary.negative}</li> + <li>Neutral Words: ${analysis.summary.neutral}</li> + </ul> + `; + + overlay.appendChild(closeButton); + overlay.appendChild(content); + document.body.appendChild(overlay); + }; + + return { + analyzeText, + extractPageContent, + createResultsOverlay + }; +}; + +const analyzePage = () => { + const analyzer = createWebSentimentAnalyzer(); + const content = analyzer.extractPageContent(); + const analysis = analyzer.analyzeText(content); + analyzer.createResultsOverlay(analysis); +}; \ No newline at end of file |