Special Character Classes Explained with Examples
Special Character Classes Explained with Examples
1. [\\\^\-\]] – Escaped special characters in brackets
Description: Matches literal backslash, caret, hyphen, or closing bracket characters inside character classes
Example 1: Matching literal special characters
python
import re text = "Special chars: \\ ^ - ] [" result = re.findall(r'[\\\^\-\]]', text) print(result) # ['\\', '^', '-', ']'] # Matches literal \, ^, -, and ] characters
Example 2: Extracting file paths with backslashes
python
text = "Paths: C:\\Windows\\System32, /usr/bin/, D:\\Program Files\\" result = re.findall(r'[A-Z]:[\\\w]+', text) print(result) # ['C:\\Windows', 'D:\\Program'] # Matches Windows paths with literal backslashes
Example 3: Finding mathematical ranges
python
text = "Ranges: 1-10, 20-30, A-Z, 5-10, a-z" result = re.findall(r'[A-Za-z0-9]\-[A-Za-z0-9]', text) print(result) # ['1-1', '0-3', 'A-Z', '5-1', 'a-z'] # Matches range patterns with literal hyphens
Example 4: Escaping regex metacharacters in search
python
text = "Regex specials: [group], ^start, end$, .any, *star" result = re.findall(r'[\^\.\*\$\[\]]', text) print(result) # ['[', ']', '^', '$', '.', '*'] # Matches literal regex metacharacters
2. [\n\t\r] – Common whitespace characters
Description: Matches newline, tab, or carriage return characters
Example 1: Finding all whitespace characters
python
text = "Hello\tWorld\nHow are you?\rGoodbye"
result = re.findall(r'[\n\t\r]', text)
print(result) # ['\t', '\n', '\r']
# Matches tab, newline, and carriage return
print("Whitespace count:", len(result)) # Whitespace count: 3
Example 2: Normalizing different line endings
python
text = "Line 1\r\nLine 2\nLine 3\rLine 4" # Replace different line endings with Unix-style \n normalized = re.sub(r'[\r\n]+', '\n', text) print(repr(normalized)) # 'Line 1\nLine 2\nLine 3\nLine 4'
Example 3: Counting indentation levels (tabs)
python
code = "def example():\n\tprint('Hello')\n\t\tprint('Indented')\n\treturn"
tabs = re.findall(r'\t', code)
print("Indentation levels found:", len(tabs)) # Indentation levels found: 3
Example 4: Splitting on any whitespace including newlines
python
text = "Hello\tWorld\nHow are\ryou today?" words = re.split(r'[\s\n\t\r]+', text) print(words) # ['Hello', 'World', 'How', 'are', 'you', 'today?'] # Splits on any whitespace character
3. [\x00-\x7F] – ASCII characters
Description: Matches any character in the ASCII range (0-127)
Example 1: Filter ASCII characters only
python
text = "Hello 世界! 123 ñ Café"
ascii_only = re.findall(r'[\x00-\x7F]', text)
print(''.join(ascii_only)) # "Hello ! 123 Caf"
# Removes non-ASCII characters (中文, ñ, é)
Example 2: Validate ASCII-only text
python
def is_ascii_only(text):
return not re.search(r'[^\x00-\x7F]', text)
print(is_ascii_only("Hello World")) # True
print(is_ascii_only("Hello 世界")) # False
print(is_ascii_only("Café")) # False
print(is_ascii_only("123!@#")) # True
Example 3: Remove control characters (non-printable ASCII)
python
text = "Hello\x00World\x07\x1BTest\nNormal"
# Keep only printable ASCII (32-126)
printable = re.findall(r'[\x20-\x7E]', text)
print(''.join(printable)) # "HelloWorldTestNormal"
Example 4: Extract ASCII strings from mixed content
python
text = "ASCII: Hello, Non-ASCII: 中文, Emoji: 😊, Numbers: 123" ascii_parts = re.findall(r'[\x20-\x7E]+', text) print(ascii_parts) # ['ASCII: Hello, Non-ASCII: ', ', Emoji: ', ', Numbers: 123']
4. [\u0000-\uFFFF] – Unicode characters
Description: Matches any character in the Basic Multilingual Plane (most common Unicode characters)
Example 1: Working with multilingual text
python
text = "Hello 世界! 🌍 Bonjour ñ Café 🎉" all_chars = re.findall(r'[\u0000-\uFFFF]', text) print(all_chars) # ['H', 'e', 'l', 'l', 'o', ' ', '世', '界', '!', ' ', '🌍', ' ', 'B', 'o', 'n', 'j', 'o', 'u', 'r', ' ', 'ñ', ' ', 'C', 'a', 'f', 'é', ' ', '🎉'] # Matches all characters including Unicode
Example 2: Finding specific Unicode ranges
python
text = "中文 Chinese, 日本語 Japanese, 한국어 Korean, English"
# Find CJK characters (approx range)
cjk_chars = re.findall(r'[\u4E00-\u9FFF]', text)
print(''.join(cjk_chars)) # "中文日本語韩国語"
Example 3: Validating Unicode input
python
def contains_unicode(text):
return bool(re.search(r'[^\u0000-\u007F]', text))
print(contains_unicode("ASCII only")) # False
print(contains_unicode("Café")) # True
print(contains_unicode("Hello 世界")) # True
print(contains_unicode("123!@#")) # False
Example 4: Extracting emojis and symbols
python
text = "I love Python! 🐍🚀 It's amazing! 💻✨ 🎯" # Approximate emoji/symbol range symbols = re.findall(r'[\u2000-\uFFFF]', text) print(symbols) # ['🐍', '🚀', '💻', '✨', '🎯'] # Matches emojis and other symbols beyond basic ASCII
Bonus: Advanced Examples
Example: Mixed character class usage
python
text = "File: C:\\Users\\文档\\file.txt\nSize: 1.5MB\r\nUnicode: 中文 🎉"
# Extract different components
paths = re.findall(r'[A-Z]:[\\\w\u4E00-\u9FFF]+', text)
sizes = re.findall(r'[\d.]+[A-Za-z]+', text)
unicode_content = re.findall(r'[\u4E00-\u9FFF\U0001F300-\U0001F9FF]', text)
print("Paths:", paths) # ['C:\\Users\\文档\\file']
print("Sizes:", sizes) # ['1.5MB']
print("Unicode:", unicode_content) # ['文', '🎉']
Example: Cleaning text with multiple character classes
python
def clean_text(text):
# Remove control characters but keep Unicode
text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', text)
# Normalize whitespace
text = re.sub(r'[\n\t\r]+', ' ', text)
# Remove excessive spaces
text = re.sub(r' +', ' ', text)
return text.strip()
dirty_text = "Hello\t\tWorld\n\n\nUnicode: 中文\r\x00Control chars"
clean = clean_text(dirty_text)
print(repr(clean)) # 'Hello World Unicode: 中文'
Example: Password complexity checker
python
def check_password_complexity(password):
has_upper = bool(re.search(r'[A-Z]', password))
has_lower = bool(re.search(r'[a-z]', password))
has_digit = bool(re.search(r'[0-9]', password))
has_special = bool(re.search(r'[^\w]', password))
has_unicode = bool(re.search(r'[^\x00-\x7F]', password))
return {
'has_upper': has_upper,
'has_lower': has_lower,
'has_digit': has_digit,
'has_special': has_special,
'has_unicode': has_unicode,
'is_strong': has_upper and has_lower and has_digit and len(password) >= 8
}
print(check_password_complexity("Pass123!"))
print(check_password_complexity("password"))
print(check_password_complexity("Pässwörd123!"))