9. Smart Defaults & Intelligent Extraction
9.1 Smart Defaults System Overview
Purpose: Automatically fill missing fields to enable seamless bulk upload processing with minimal user input
Default Values
SMART_DEFAULTS = {
'currency': 'INR', # Indian Rupee (most common use case)
'optimization_mode': 'greedy' # Standard greedy algorithm
}
Implementation Philosophy
- ✓ Minimal User Input: User only needs to provide amount
- ✓ Intelligent Detection: Auto-detect currency/mode from context when possible
- ✓ Graceful Fallback: Use sensible defaults when detection fails
- ✓ Transparent Logging: Log all default applications for auditability
9.2 Implementation in OCR Processor
File: packages/local-backend/app/services/ocr_processor.py
9.2.1 Initialization with Defaults
class OCRProcessor:
def __init__(self, default_currency: str = 'INR', default_mode: str = 'greedy'):
"""
Initialize OCR processor with smart defaults.
Args:
default_currency: Currency to use when not specified (default: INR)
default_mode: Optimization mode when not specified (default: greedy)
"""
self.default_currency = default_currency
self.default_mode = default_mode
logger.info(f"Smart defaults configured: currency={default_currency}, mode={default_mode}")
9.2.2 Application in CSV Parsing
def _parse_row(self, row: Dict, row_number: int) -> Dict:
"""Parse CSV row with smart defaults."""
# Amount is required
amount = Decimal(row.get('amount', '').strip().replace(',', ''))
# Currency - smart default
currency = row.get('currency', '').strip().upper()
if not currency:
currency = self.default_currency
logger.info(f"Row {row_number}: No currency specified, using default {currency}")
# Mode - smart default
mode = row.get('mode', '').strip().lower()
if not mode:
mode = self.default_mode
logger.info(f"Row {row_number}: No mode specified, using default {mode}")
return {
'row_number': row_number,
'amount': float(amount),
'currency': currency,
'optimization_mode': mode
}
9.2.3 Application in Text Parsing
def _parse_numbers_fallback(self, text: str) -> List[Dict[str, Any]]:
"""
Extract numbers with smart defaults.
When only numbers are found (no currency/mode info):
- Currency defaults to INR
- Mode defaults to greedy
"""
results = []
numbers = re.findall(r'(\d{1,3}(?:,\d{3})*(?:\.\d{2})?|\d+\.?\d*)', text)
for row_num, num_str in enumerate(numbers, start=1):
try:
amount = Decimal(num_str.replace(',', ''))
# Apply smart defaults
results.append({
'row_number': row_num,
'amount': float(amount),
'currency': self.default_currency, # Smart default
'optimization_mode': self.default_mode # Smart default
})
except:
continue
logger.info(f"Applied smart defaults to {len(results)} extracted amounts")
return results
9.3 Intelligent Currency Detection
Priority Order:
- Explicit Column/Field: If CSV has 'currency' column or text has "Currency: USD"
- Symbol Detection: Detect ₹, Rs, $, €, £ in the text
- Code Detection: Detect INR, USD, EUR, GBP keywords
- Name Detection: Detect "Rupee", "Dollar", "Euro", "Pound"
- Smart Default: Fallback to INR
Implementation
def _detect_currency_in_text(self, text: str) -> str:
"""
Intelligent currency detection with fallback.
Detection strategies (in order):
1. Currency symbols: ₹, Rs, $, €, £
2. Currency codes: INR, USD, EUR, GBP
3. Currency names: Rupee, Dollar, Euro, Pound
4. Default: INR (smart default)
"""
text_upper = text.upper()
# Strategy 1: Symbols
symbol_map = {
'₹': 'INR', 'RS': 'INR', 'RS.': 'INR',
'$': 'USD',
'€': 'EUR',
'£': 'GBP'
}
for symbol, currency in symbol_map.items():
if symbol in text or symbol in text_upper:
logger.debug(f"Detected currency {currency} from symbol '{symbol}'")
return currency
# Strategy 2: Currency codes
if 'INR' in text_upper:
return 'INR'
elif 'USD' in text_upper:
return 'USD'
elif 'EUR' in text_upper:
return 'EUR'
elif 'GBP' in text_upper:
return 'GBP'
# Strategy 3: Currency names
name_map = {
'RUPEE': 'INR',
'INDIAN': 'INR',
'DOLLAR': 'USD',
'EURO': 'EUR',
'POUND': 'GBP',
'STERLING': 'GBP'
}
for name, currency in name_map.items():
if name in text_upper:
logger.debug(f"Detected currency {currency} from name '{name}'")
return currency
# Strategy 4: Smart default
logger.info(f"No currency detected, using smart default: {self.default_currency}")
return self.default_currency
Detection Examples
| Input Text | Detected Currency | Strategy Used |
|---|---|---|
| "1000 ₹" | INR | Symbol Detection |
| "Rs 2500" | INR | Symbol Detection |
| "$500" | USD | Symbol Detection |
| "1000 USD" | USD | Code Detection |
| "500 Rupees" | INR | Name Detection |
| "1000" | INR | Smart Default |
9.4 Intelligent Mode Detection
Priority Order:
- Explicit Field: If present in data
- Keyword Detection: Detect mode keywords in text
- Smart Default: Fallback to greedy
Keyword Mapping
MODE_KEYWORDS = {
'greedy': [
'greedy', 'standard', 'default', 'normal',
'quick', 'fast', 'standard algorithm'
],
'balanced': [
'balanced', 'mixed', 'even', 'equal',
'balance', 'moderate'
],
'minimize_large': [
'minimize large', 'min large', 'fewer notes',
'less notes', 'reduce notes', 'minimize bills'
],
'minimize_small': [
'minimize small', 'min small', 'fewer coins',
'less coins', 'reduce coins', 'minimize change'
]
}
def _detect_mode_in_text(self, text: str) -> str:
"""
Intelligent mode detection with fallback.
Detection strategies:
1. Keyword matching (case-insensitive)
2. Default: greedy (smart default)
"""
text_lower = text.lower()
# Check all mode keywords
for mode, keywords in MODE_KEYWORDS.items():
for keyword in keywords:
if keyword in text_lower:
logger.debug(f"Detected mode '{mode}' from keyword '{keyword}'")
return mode
# Smart default
logger.info(f"No mode detected, using smart default: {self.default_mode}")
return self.default_mode
Detection Examples
| Input Text | Detected Mode | Keyword Matched |
|---|---|---|
| "1000 balanced" | balanced | "balanced" |
| "Calculate with fewer notes" | minimize_large | "fewer notes" |
| "Use greedy algorithm" | greedy | "greedy" |
| "1000" | greedy | (smart default) |
9.5 Smart Defaults in API Layer
Bulk Upload Endpoint with Configurable Defaults
@router.post("/bulk/upload")
async def bulk_upload(
file: UploadFile = File(...),
save_to_history: bool = Form(default=True),
# Optional: Override smart defaults
default_currency: str = Form(default='INR'),
default_mode: str = Form(default='greedy'),
db: Session = Depends(get_db)
):
"""
Upload bulk file with configurable smart defaults.
Smart defaults are applied when fields are missing:
- currency: Default to INR (or custom default)
- mode: Default to greedy (or custom default)
"""
# Initialize processor with custom defaults
processor = OCRProcessor(
default_currency=default_currency,
default_mode=default_mode
)
# Process file (will apply defaults as needed)
extracted_data = processor.process_file(file_data, file.filename)
# Log default usage statistics
default_currency_count = sum(
1 for row in extracted_data
if row.get('currency') == default_currency
)
logger.info(f"Smart defaults applied: {default_currency_count}/{len(extracted_data)} rows used default currency")
# Continue with calculations...
9.6 User-Visible Default Application
UI Indicator in Results
File: packages/desktop-app/src/pages/BulkUploadPage.tsx
// Show which defaults were applied in results
{result.applied_defaults && (
{result.applied_defaults.currency && `Currency: ${result.currency} (default)`}
{result.applied_defaults.mode && `, Mode: ${result.mode} (default)`}
)}
Results Table with Badges
{result.currency}
{result.applied_defaults?.currency && (
default
)}
{result.mode}
{result.applied_defaults?.mode && (
default
)}
9.7 Smart Defaults Configuration
Settings Storage (Future Enhancement)
class Settings(Base):
__tablename__ = "settings"
# User-configurable smart defaults
smart_default_currency = Column(String(3), default="INR")
smart_default_mode = Column(String(20), default="greedy")
# Smart detection preferences
enable_currency_detection = Column(Boolean, default=True)
enable_mode_detection = Column(Boolean, default=True)
API to Update Defaults
@router.put("/settings/smart-defaults")
async def update_smart_defaults(
currency: Optional[str] = Body(None),
mode: Optional[str] = Body(None),
db: Session = Depends(get_db)
):
"""Update smart default preferences."""
settings = db.query(Settings).first()
if currency:
settings.smart_default_currency = currency
if mode:
settings.smart_default_mode = mode
db.commit()
return {"message": "Smart defaults updated", "settings": settings}
9.8 Validation of Defaults
Ensure Defaults Are Valid
def validate_smart_defaults(currency: str, mode: str) -> None:
"""Validate smart default values."""
VALID_CURRENCIES = {'INR', 'USD', 'EUR', 'GBP'}
VALID_MODES = {'greedy', 'balanced', 'minimize_large', 'minimize_small'}
if currency not in VALID_CURRENCIES:
raise ValueError(f"Invalid default currency: {currency}")
if mode not in VALID_MODES:
raise ValueError(f"Invalid default mode: {mode}")
logger.info(f"Smart defaults validated: currency={currency}, mode={mode}")
9.9 Smart Defaults Test Cases
Test Ultra-Minimal CSV
def test_ultra_minimal_csv():
"""Test CSV with only amounts (all defaults applied)."""
csv_content = """amount
1000
2500
5000"""
processor = OCRProcessor(default_currency='INR', default_mode='greedy')
results = processor._parse_csv_text(csv_content)
# All rows should have defaults
assert all(r['currency'] == 'INR' for r in results)
assert all(r['optimization_mode'] == 'greedy' for r in results)
assert len(results) == 3
Test Mixed Detection
def test_mixed_currency_detection():
"""Test mixing explicit currency and auto-detection."""
text = """
1000 INR
2500
500 USD
"""
processor = OCRProcessor(default_currency='INR', default_mode='greedy')
results = processor._parse_list_text(text)
assert results[0]['currency'] == 'INR' # Explicit
assert results[1]['currency'] == 'INR' # Default
assert results[2]['currency'] == 'USD' # Explicit
9.10 Logging & Audit Trail
Default Application Logging
class DefaultApplicationLogger:
"""Track smart default applications for auditing."""
@staticmethod
def log_default_applied(row_number: int, field: str, value: str, reason: str):
"""Log when a default is applied."""
logger.info(
f"Row {row_number}: Applied default {field}='{value}' ({reason})"
)
@staticmethod
def get_default_statistics(results: List[Dict]) -> Dict:
"""Calculate default usage statistics."""
total_rows = len(results)
default_currency_count = sum(
1 for r in results
if r.get('_default_applied', {}).get('currency', False)
)
default_mode_count = sum(
1 for r in results
if r.get('_default_applied', {}).get('mode', False)
)
return {
'total_rows': total_rows,
'default_currency_applied': default_currency_count,
'default_mode_applied': default_mode_count,
'currency_detection_rate': (total_rows - default_currency_count) / total_rows if total_rows > 0 else 0,
'mode_detection_rate': (total_rows - default_mode_count) / total_rows if total_rows > 0 else 0
}
Statistics Example
Sample Audit Report
{
"total_rows": 100,
"default_currency_applied": 25,
"default_mode_applied": 80,
"currency_detection_rate": 0.75,
"mode_detection_rate": 0.20
}
Interpretation: 75% of rows had detectable currency, 20% had detectable mode
✓ Section Complete
This section covers complete Smart Defaults system including intelligent currency detection (4 strategies), mode detection (keyword mapping), configurable defaults, UI indicators, validation, testing, and audit logging.