From 6fe6868793ed1c0c6859ffaabe44e015bea1f73f Mon Sep 17 00:00:00 2001 From: "Dimas D. Angga" Date: Mon, 30 Mar 2026 09:48:04 +0700 Subject: [PATCH] Add AI Text Summarizer CLI tool - Simple command-line text file summarizer - Extracts first N sentences from text files - Includes error handling and statistics - No external dependencies required - Closes #560 --- AI Text Summarizer/README.md | 128 +++++++++++++++++++++++++ AI Text Summarizer/main.py | 172 ++++++++++++++++++++++++++++++++++ AI Text Summarizer/sample.txt | 7 ++ 3 files changed, 307 insertions(+) create mode 100644 AI Text Summarizer/README.md create mode 100644 AI Text Summarizer/main.py create mode 100644 AI Text Summarizer/sample.txt diff --git a/AI Text Summarizer/README.md b/AI Text Summarizer/README.md new file mode 100644 index 00000000..ac827a73 --- /dev/null +++ b/AI Text Summarizer/README.md @@ -0,0 +1,128 @@ +# AI Text Summarizer + +## Overview + +A simple, beginner-friendly command-line tool that summarizes text files by extracting the most important sentences. This tool helps you quickly understand the main points of large text documents without reading the entire content. + +## Features + +- **Easy to Use**: Simple command-line interface with clear instructions +- **Customizable**: Choose how many sentences you want in your summary +- **Smart Extraction**: Automatically identifies and extracts key sentences +- **Error Handling**: Gracefully handles missing files, empty files, and permission errors +- **Clean Output**: Displays summary with statistics (original length, summary length, reduction percentage) +- **No Dependencies**: Uses only Python standard library - no external packages required! + +## Prerequisites + +- Python 3.6 or higher + +## Installation + +1. Clone or download this repository +2. Navigate to the `AI Text Summarizer` folder +3. No additional packages to install! + +## Usage + +### Basic Usage + +Summarize a text file with default settings (3 sentences): + +```bash +python main.py yourfile.txt +``` + +### Custom Number of Sentences + +Specify how many sentences you want in the summary: + +```bash +python main.py yourfile.txt --sentences 5 +``` + +Or use the short form: + +```bash +python main.py yourfile.txt -s 2 +``` + +### Examples + +```bash +# Summarize an article with 3 sentences +python main.py article.txt + +# Summarize a story with 5 sentences +python main.py story.txt --sentences 5 + +# Quick summary with just 1 sentence +python main.py document.txt -s 1 +``` + +## How It Works + +1. **File Reading**: The tool reads your text file using UTF-8 encoding +2. **Text Processing**: Removes extra whitespace and normalizes the text +3. **Sentence Extraction**: Intelligently splits the text into sentences using punctuation marks (., !, ?) +4. **Summary Generation**: Selects the first N sentences based on your preference +5. **Output Display**: Shows the summary with helpful statistics + +## Example Output + +``` +====================================================================== +TEXT SUMMARY +====================================================================== + +This is the first sentence of the document. This is the second +sentence with important information. Here is the third sentence +that concludes the main idea. + +---------------------------------------------------------------------- +Original length: 1247 characters +Summary length: 156 characters +Reduction: 87.5% +====================================================================== +``` + +## Error Handling + +The tool handles common errors gracefully: + +- **File not found**: Clear message indicating the file doesn't exist +- **Empty file**: Informs you when the file is empty or contains only whitespace +- **Permission denied**: Alerts you if the file can't be read due to permissions +- **Invalid arguments**: Validates that the number of sentences is at least 1 + +## Sample Test + +Create a test file to try the summarizer: + +```bash +echo "Python is a high-level programming language. It is known for its simplicity and readability. Python is widely used in web development, data science, and automation. The language was created by Guido van Rossum in 1991. Today, Python is one of the most popular programming languages in the world." > test.txt + +python main.py test.txt +``` + +## Future Enhancements + +Potential improvements for future versions: + +- AI-powered summarization using NLP techniques +- Support for multiple file formats (PDF, DOCX, etc.) +- Keyword extraction +- Summary quality scoring +- Export summary to file + +## Contributing + +Feel free to fork this project and submit pull requests for any improvements! + +## License + +This project is open source and available for educational purposes. + +## Author + +Created by **Dimas D. Angga** as a contribution to the Python-Scripts repository. diff --git a/AI Text Summarizer/main.py b/AI Text Summarizer/main.py new file mode 100644 index 00000000..ef9ea267 --- /dev/null +++ b/AI Text Summarizer/main.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +""" +AI Text Summarizer +A simple command-line tool to summarize text files. +Author: Dimas D. Angga +""" + +import argparse +import sys +import os + + +def read_file(file_path): + """ + Reads the content of a text file. + + Args: + file_path (str): Path to the text file + + Returns: + str: Content of the file + + Raises: + FileNotFoundError: If the file doesn't exist + PermissionError: If the file can't be read + """ + try: + with open(file_path, 'r', encoding='utf-8') as file: + content = file.read() + return content + except FileNotFoundError: + raise FileNotFoundError(f"Error: File '{file_path}' not found.") + except PermissionError: + raise PermissionError(f"Error: Permission denied to read '{file_path}'.") + except Exception as e: + raise Exception(f"Error reading file: {str(e)}") + + +def summarize_text(text, num_sentences=3): + """ + Creates a simple summary by extracting the first few sentences. + + Args: + text (str): The text to summarize + num_sentences (int): Number of sentences to include in the summary + + Returns: + str: The summarized text + """ + # Remove extra whitespace and newlines + text = ' '.join(text.split()) + + # Check if text is empty + if not text.strip(): + return "Error: The file is empty or contains only whitespace." + + # Split text into sentences (simple approach using common sentence endings) + sentences = [] + temp_sentence = "" + + for char in text: + temp_sentence += char + # Check for sentence ending punctuation followed by space or end of text + if char in '.!?' and (len(temp_sentence) > 1): + sentences.append(temp_sentence.strip()) + temp_sentence = "" + + # Add any remaining text as a sentence + if temp_sentence.strip(): + sentences.append(temp_sentence.strip()) + + # If no sentences were found, return the first N words + if not sentences: + words = text.split() + if len(words) <= 50: + return text + return ' '.join(words[:50]) + "..." + + # Return the first N sentences + if len(sentences) <= num_sentences: + summary = ' '.join(sentences) + else: + summary = ' '.join(sentences[:num_sentences]) + + return summary + + +def format_output(summary, original_length, summary_length): + """ + Formats the output in a user-friendly way. + + Args: + summary (str): The summarized text + original_length (int): Character count of original text + summary_length (int): Character count of summary + """ + print("\n" + "="*70) + print("TEXT SUMMARY") + print("="*70) + print(f"\n{summary}\n") + print("-"*70) + print(f"Original length: {original_length} characters") + print(f"Summary length: {summary_length} characters") + reduction = ((original_length - summary_length) / original_length * 100) if original_length > 0 else 0 + print(f"Reduction: {reduction:.1f}%") + print("="*70 + "\n") + + +def main(): + """ + Main function to handle command-line arguments and orchestrate the summarization. + """ + # Set up argument parser + parser = argparse.ArgumentParser( + description='Summarize text files by extracting key sentences.', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python main.py document.txt + python main.py article.txt --sentences 5 + python main.py story.txt -s 2 + """ + ) + + parser.add_argument( + 'file', + type=str, + help='Path to the text file to summarize' + ) + + parser.add_argument( + '-s', '--sentences', + type=int, + default=3, + help='Number of sentences to include in summary (default: 3)' + ) + + # Parse arguments + args = parser.parse_args() + + # Validate number of sentences + if args.sentences < 1: + print("Error: Number of sentences must be at least 1.") + sys.exit(1) + + try: + # Read the file + print(f"\nReading file: {args.file}...") + text = read_file(args.file) + + # Generate summary + print("Generating summary...\n") + summary = summarize_text(text, args.sentences) + + # Display results + format_output(summary, len(text), len(summary)) + + except FileNotFoundError as e: + print(f"\n{e}") + print("Please check the file path and try again.\n") + sys.exit(1) + except PermissionError as e: + print(f"\n{e}") + print("Please check file permissions and try again.\n") + sys.exit(1) + except Exception as e: + print(f"\nAn unexpected error occurred: {e}\n") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/AI Text Summarizer/sample.txt b/AI Text Summarizer/sample.txt new file mode 100644 index 00000000..72c5aa85 --- /dev/null +++ b/AI Text Summarizer/sample.txt @@ -0,0 +1,7 @@ +Python is a high-level, interpreted programming language known for its simplicity and readability. It was created by Guido van Rossum and first released in 1991. Python emphasizes code readability with its notable use of significant whitespace. Its language constructs and object-oriented approach aim to help programmers write clear, logical code for small and large-scale projects. + +Python is dynamically typed and garbage-collected. It supports multiple programming paradigms, including structured, object-oriented, and functional programming. Python is often described as a "batteries included" language due to its comprehensive standard library. The language has a large and active community that contributes to a vast ecosystem of third-party packages and frameworks. + +Python is widely used in various domains such as web development, data science, artificial intelligence, machine learning, automation, and scientific computing. Popular frameworks include Django and Flask for web development, NumPy and Pandas for data analysis, and TensorFlow and PyTorch for machine learning. Its versatility and ease of learning make Python one of the most popular programming languages in the world today. + +The Python Software Foundation manages the development of Python and promotes its use. Python continues to evolve with regular updates and improvements. The language's philosophy is summarized in "The Zen of Python," which emphasizes principles like simplicity, readability, and the idea that there should be one obvious way to do things. This philosophy has helped Python maintain its popularity and relevance in an ever-changing technology landscape.