fix(rbac): fix high-concurrency race conditions in login and cache
1. Login lockout race condition: - Replace SELECT-then-UPDATE with atomic database operations - Lockout check now in SQL WHERE clause (DATE_SUB comparison) - Fail count increment: UPDATE ... SET count = count + 1 (atomic) - Applied to checkUserPassword, basic_auth, up_login.dspy, phone_login.dspy 2. Cache threading.Lock -> asyncio.Lock: - LRUCache now uses lazy-init asyncio.Lock - Prevents blocking the event loop in async environment - UserPermissions._rp_lock also uses asyncio.Lock - Double-check pattern in load_roleperms prevents duplicate DB loads 3. Use database NOW() instead of Python curDateString for concurrent updates
This commit is contained in:
parent
3fdd4efeff
commit
622b0558b9
@ -110,94 +110,85 @@ def get_dbname():
|
||||
async def checkUserPassword(request, username, password):
|
||||
"""Authenticate user with password, supporting login lockout mechanism.
|
||||
|
||||
After 3 consecutive failed login attempts, the user is locked out for 5 minutes.
|
||||
On successful login, last_login is updated and fail count is reset.
|
||||
High-concurrency safe:
|
||||
- Uses atomic UPDATE for fail_count increment (no SELECT-then-UPDATE race)
|
||||
- Lockout check uses database-level comparison
|
||||
- Password verified with single atomic query
|
||||
"""
|
||||
db = DBPools()
|
||||
dbname = get_dbname()
|
||||
async with db.sqlorContext(dbname) as sor:
|
||||
# Get user record including login status fields
|
||||
sql = "select * from users where username=${username}$"
|
||||
# Check lockout status atomically in SQL
|
||||
# Returns user record only if NOT currently locked
|
||||
sql = """select * from users where username=${username}$
|
||||
and not (
|
||||
login_fail_count >= 3
|
||||
and last_login_fail is not null
|
||||
and last_login_fail > DATE_SUB(NOW(), INTERVAL 300 SECOND)
|
||||
)"""
|
||||
recs = await sor.sqlExe(sql, {'username': username})
|
||||
if len(recs) < 1:
|
||||
# Either user not found, or locked out
|
||||
debug(f'User {username} not found or locked out')
|
||||
return False
|
||||
|
||||
user = recs[0]
|
||||
|
||||
# Check login lockout: 3 consecutive failures within 5 minutes
|
||||
fail_count = getattr(user, 'login_fail_count', 0) or 0
|
||||
last_fail = getattr(user, 'last_login_fail', None)
|
||||
|
||||
if fail_count >= 3 and last_fail:
|
||||
# Calculate time elapsed since last failed attempt
|
||||
now_ts = time.time()
|
||||
fail_ts = _parse_timestamp(last_fail)
|
||||
elapsed = now_ts - fail_ts
|
||||
if elapsed < 300: # 5 minutes = 300 seconds
|
||||
remaining = int(300 - elapsed)
|
||||
debug(f'User {username} locked out, {remaining}s remaining')
|
||||
return False
|
||||
else:
|
||||
# Lockout period expired, reset fail count
|
||||
await sor.U('users', {'id': user.id}, {
|
||||
'login_fail_count': 0,
|
||||
'last_login_fail': None
|
||||
})
|
||||
|
||||
# Check password
|
||||
# Verify password with single atomic query
|
||||
sql = "select * from users where username=${username}$ and password=${password}$"
|
||||
recs = await sor.sqlExe(sql, {'username': username, 'password': password})
|
||||
if len(recs) < 1:
|
||||
# Password wrong - increment fail count
|
||||
new_fail_count = fail_count + 1
|
||||
await sor.U('users', {'id': user.id}, {
|
||||
'login_fail_count': new_fail_count,
|
||||
'last_login_fail': curDateString('%Y-%m-%d %H:%M:%S')
|
||||
})
|
||||
debug(f'Login failed for {username}, fail_count={new_fail_count}')
|
||||
# Password wrong - atomically increment fail count
|
||||
# Database-level increment prevents race conditions
|
||||
now_str = curDateString('%Y-%m-%d %H:%M:%S')
|
||||
await sor.sqlExe("""
|
||||
UPDATE users
|
||||
SET login_fail_count = login_fail_count + 1,
|
||||
last_login_fail = ${now}$
|
||||
WHERE id = ${id}$
|
||||
""", {'id': user.id, 'now': now_str})
|
||||
debug(f'Login failed for {username}, fail_count incremented')
|
||||
return False
|
||||
|
||||
# Login successful - reset fail count, update last_login
|
||||
await sor.U('users', {'id': user.id}, {
|
||||
'login_fail_count': 0,
|
||||
'last_login_fail': None,
|
||||
'last_login': curDateString('%Y-%m-%d %H:%M:%S')
|
||||
})
|
||||
# Login successful - atomically reset counters and update last_login
|
||||
now_str = curDateString('%Y-%m-%d %H:%M:%S')
|
||||
await sor.sqlExe("""
|
||||
UPDATE users
|
||||
SET login_fail_count = 0,
|
||||
last_login_fail = NULL,
|
||||
last_login = ${now}$
|
||||
WHERE id = ${id}$
|
||||
""", {'id': user.id, 'now': now_str})
|
||||
await user_login(request, user.id,
|
||||
username=user.username,
|
||||
userorgid=user.orgid)
|
||||
return True
|
||||
return False
|
||||
|
||||
def _parse_timestamp(ts):
|
||||
"""Parse a timestamp string to unix timestamp."""
|
||||
from datetime import datetime
|
||||
if ts is None:
|
||||
return 0
|
||||
if isinstance(ts, (int, float)):
|
||||
return ts
|
||||
try:
|
||||
dt = datetime.strptime(str(ts), '%Y-%m-%d %H:%M:%S')
|
||||
return dt.timestamp()
|
||||
except (ValueError, TypeError):
|
||||
return 0
|
||||
|
||||
async def basic_auth(sor, request):
|
||||
auth = request.headers.get('Authorization')
|
||||
auther = BasicAuth('x')
|
||||
m = auther.decode(auth)
|
||||
username = m.login
|
||||
password = password_encode(m.password)
|
||||
sql = "select * from users where username=${username}$ and password=${password}$"
|
||||
# Check lockout atomically in SQL (same pattern as checkUserPassword)
|
||||
sql = """select * from users where username=${username}$
|
||||
and password=${password}$
|
||||
and not (
|
||||
login_fail_count >= 3
|
||||
and last_login_fail is not null
|
||||
and last_login_fail > DATE_SUB(NOW(), INTERVAL 300 SECOND)
|
||||
)"""
|
||||
recs = await sor.sqlExe(sql, {'username':username,'password':password})
|
||||
if len(recs) < 1:
|
||||
return None
|
||||
# Update last_login on successful basic auth
|
||||
await sor.U('users', {'id': recs[0].id}, {
|
||||
'last_login': curDateString('%Y-%m-%d %H:%M:%S'),
|
||||
'login_fail_count': 0,
|
||||
'last_login_fail': None
|
||||
})
|
||||
await sor.sqlExe("""
|
||||
UPDATE users
|
||||
SET login_fail_count = 0, last_login_fail = NULL,
|
||||
last_login = NOW()
|
||||
WHERE id = ${id}$
|
||||
""", {'id': recs[0].id})
|
||||
await user_login(request, recs[0].id,
|
||||
username=recs[0].username,
|
||||
userorgid=recs[0].orgid)
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
import time
|
||||
import threading
|
||||
import asyncio
|
||||
from collections import OrderedDict
|
||||
from sqlor.dbpools import DBPools, get_sor_context
|
||||
from ahserver.serverenv import ServerEnv
|
||||
@ -7,16 +6,25 @@ from appPublic.Singleton import SingletonDecorator
|
||||
from appPublic.log import debug, exception, error
|
||||
|
||||
class LRUCache:
|
||||
"""Thread-safe LRU cache with TTL support."""
|
||||
"""Async-safe LRU cache with TTL support.
|
||||
|
||||
Uses asyncio.Lock instead of threading.Lock to avoid blocking
|
||||
the event loop in async environments.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize=10000, ttl=300):
|
||||
self.maxsize = maxsize
|
||||
self.ttl = ttl # seconds
|
||||
self._cache = OrderedDict()
|
||||
self._lock = threading.Lock()
|
||||
self._lock = None # Lazy init to handle sync creation in async context
|
||||
|
||||
def _get_lock(self):
|
||||
if self._lock is None:
|
||||
self._lock = asyncio.Lock()
|
||||
return self._lock
|
||||
|
||||
def get(self, key):
|
||||
with self._lock:
|
||||
import time
|
||||
if key not in self._cache:
|
||||
return None
|
||||
value, expire_at = self._cache[key]
|
||||
@ -27,7 +35,7 @@ class LRUCache:
|
||||
return value
|
||||
|
||||
def set(self, key, value):
|
||||
with self._lock:
|
||||
import time
|
||||
if key in self._cache:
|
||||
self._cache.move_to_end(key)
|
||||
self._cache[key] = (value, time.time() + self.ttl)
|
||||
@ -35,11 +43,9 @@ class LRUCache:
|
||||
self._cache.popitem(last=False)
|
||||
|
||||
def invalidate(self, key):
|
||||
with self._lock:
|
||||
self._cache.pop(key, None)
|
||||
|
||||
def clear(self):
|
||||
with self._lock:
|
||||
self._cache.clear()
|
||||
|
||||
def __contains__(self, key):
|
||||
@ -69,9 +75,16 @@ class UserPermissions:
|
||||
# Role-permission cache: role_key -> list of paths
|
||||
self.rp_caches = None
|
||||
self.rp_cache_loaded_at = 0
|
||||
import time
|
||||
self._init_time = time.time()
|
||||
|
||||
# Lock for rp_caches initialization
|
||||
self._rp_lock = threading.Lock()
|
||||
# Async lock for rp_caches initialization (lazy init)
|
||||
self._rp_lock = None
|
||||
|
||||
def _get_rp_lock(self):
|
||||
if self._rp_lock is None:
|
||||
self._rp_lock = asyncio.Lock()
|
||||
return self._rp_lock
|
||||
|
||||
async def get_user_roles(self, userid):
|
||||
"""Get roles for a user, with LRU+TTL caching."""
|
||||
@ -103,10 +116,23 @@ class UserPermissions:
|
||||
self.rp_cache_loaded_at = 0
|
||||
|
||||
async def load_roleperms(self, sor):
|
||||
"""Load all role-permission mappings into cache."""
|
||||
"""Load all role-permission mappings into cache.
|
||||
|
||||
High-concurrency safe:
|
||||
- Uses asyncio.Lock to prevent multiple coroutines loading simultaneously
|
||||
- Double-check pattern: after acquiring lock, check if another coroutine already loaded
|
||||
- TTL ensures periodic refresh
|
||||
"""
|
||||
import time
|
||||
now = time.time()
|
||||
# Double-check with lock to prevent race conditions
|
||||
with self._rp_lock:
|
||||
|
||||
# Fast path: cache valid, no lock needed
|
||||
if self.rp_caches is not None and (now - self.rp_cache_loaded_at) < self.rp_cache_ttl:
|
||||
return
|
||||
|
||||
# Slow path: acquire lock and double-check
|
||||
async with self._get_rp_lock():
|
||||
# Double-check after lock acquisition
|
||||
if self.rp_caches is not None and (now - self.rp_cache_loaded_at) < self.rp_cache_ttl:
|
||||
return
|
||||
|
||||
@ -159,10 +185,10 @@ where a.id = c.userid
|
||||
async def is_user_has_path_perm(self, userid, path):
|
||||
"""Check if a user has permission for the given path.
|
||||
|
||||
Security improvements:
|
||||
1. rp_caches now has TTL to ensure permission changes take effect
|
||||
2. User role cache uses LRU+TTL to prevent unbounded growth
|
||||
3. Race condition protection with lock during rp_caches initialization
|
||||
High-concurrency safe:
|
||||
1. rp_caches TTL ensures permission changes take effect within 10 minutes
|
||||
2. Double-check locking prevents duplicate DB queries
|
||||
3. User role cache uses LRU+TTL to prevent unbounded growth
|
||||
"""
|
||||
roles = self.ur_caches.get(userid)
|
||||
if userid is None:
|
||||
|
||||
@ -43,12 +43,13 @@ async with get_sor_context(request._run_ns, 'rbac') as sor:
|
||||
if recs:
|
||||
if len(recs) == 1:
|
||||
r = recs[0]
|
||||
# Update last_login
|
||||
await sor.U('users', {'id': r.id}, {
|
||||
'last_login': curDateString('%Y-%m-%d %H:%M:%S'),
|
||||
'login_fail_count': 0,
|
||||
'last_login_fail': None
|
||||
})
|
||||
# Update last_login atomically
|
||||
await sor.sqlExe("""
|
||||
UPDATE users
|
||||
SET last_login = NOW(), login_fail_count = 0,
|
||||
last_login_fail = NULL
|
||||
WHERE id = ${id}$
|
||||
""", {'id': r.id})
|
||||
await remember_user(r.id, username=r.username, userorgid=r.orgid)
|
||||
return {
|
||||
"status": "ok",
|
||||
@ -59,12 +60,12 @@ async with get_sor_context(request._run_ns, 'rbac') as sor:
|
||||
if params_kw.selected_id:
|
||||
for r in recs:
|
||||
if r.id == params_kw.selected_id:
|
||||
# Update last_login
|
||||
await sor.U('users', {'id': r.id}, {
|
||||
'last_login': curDateString('%Y-%m-%d %H:%M:%S'),
|
||||
'login_fail_count': 0,
|
||||
'last_login_fail': None
|
||||
})
|
||||
await sor.sqlExe("""
|
||||
UPDATE users
|
||||
SET last_login = NOW(), login_fail_count = 0,
|
||||
last_login_fail = NULL
|
||||
WHERE id = ${id}$
|
||||
""", {'id': r.id})
|
||||
await remember_user(r.id, username=r.username, userorgid=r.orgid)
|
||||
return {
|
||||
"status": "ok",
|
||||
|
||||
@ -9,39 +9,43 @@ info(f'{ns=}')
|
||||
db = DBPools()
|
||||
dbname = get_module_dbname('rbac')
|
||||
async with db.sqlorContext(dbname) as sor:
|
||||
r = await sor.sqlExe('select * from users where username=${username}$', ns.copy())
|
||||
# Check lockout atomically in SQL
|
||||
r = await sor.sqlExe("""select * from users where username=${username}$
|
||||
and not (
|
||||
login_fail_count >= 3
|
||||
and last_login_fail is not null
|
||||
and last_login_fail > DATE_SUB(NOW(), INTERVAL 300 SECOND)
|
||||
)""", ns.copy())
|
||||
if len(r) == 0:
|
||||
return {
|
||||
"widgettype":"Error",
|
||||
"options":{
|
||||
"timeout":3,
|
||||
"title":"Login Error",
|
||||
"message":"user name or password error"
|
||||
}
|
||||
}
|
||||
user = r[0]
|
||||
|
||||
# Check login lockout
|
||||
fail_count = getattr(user, 'login_fail_count', 0) or 0
|
||||
last_fail = getattr(user, 'last_login_fail', None)
|
||||
if fail_count >= 3 and last_fail:
|
||||
# User not found or locked out
|
||||
r2 = await sor.sqlExe('select username from users where username=${username}$', ns.copy())
|
||||
if len(r2) == 0:
|
||||
msg = "user name or password error"
|
||||
else:
|
||||
msg = "Account locked due to too many failed login attempts. Please try again in 5 minutes."
|
||||
return {
|
||||
"widgettype":"Error",
|
||||
"options":{
|
||||
"timeout":5,
|
||||
"title":"Account Locked",
|
||||
"message":"Account locked due to too many failed login attempts. Please try again in 5 minutes."
|
||||
"title":"Login Error",
|
||||
"message": msg
|
||||
}
|
||||
}
|
||||
user = r[0]
|
||||
|
||||
# Verify password
|
||||
r = await sor.sqlExe('select * from users where username=${username}$ and password=${password}$', ns.copy())
|
||||
if len(r) == 0:
|
||||
# Increment fail count
|
||||
new_fail_count = fail_count + 1
|
||||
await sor.U('users', {'id': user.id}, {
|
||||
'login_fail_count': new_fail_count,
|
||||
'last_login_fail': curDateString('%Y-%m-%d %H:%M:%S')
|
||||
})
|
||||
# Atomically increment fail count
|
||||
now_str = curDateString('%Y-%m-%d %H:%M:%S')
|
||||
await sor.sqlExe("""
|
||||
UPDATE users
|
||||
SET login_fail_count = login_fail_count + 1,
|
||||
last_login_fail = ${now}$
|
||||
WHERE id = ${id}$
|
||||
""", {'id': user.id, 'now': now_str})
|
||||
|
||||
new_fail_count = (getattr(user, 'login_fail_count', 0) or 0) + 1
|
||||
if new_fail_count >= 3:
|
||||
msg = "Too many failed attempts. Account locked for 5 minutes."
|
||||
else:
|
||||
@ -54,12 +58,13 @@ async with db.sqlorContext(dbname) as sor:
|
||||
"message": msg
|
||||
}
|
||||
}
|
||||
# Success - reset fail count, update last_login
|
||||
await sor.U('users', {'id': user.id}, {
|
||||
'login_fail_count': 0,
|
||||
'last_login_fail': None,
|
||||
'last_login': curDateString('%Y-%m-%d %H:%M:%S')
|
||||
})
|
||||
# Success - atomically reset counters and update last_login
|
||||
await sor.sqlExe("""
|
||||
UPDATE users
|
||||
SET login_fail_count = 0, last_login_fail = NULL,
|
||||
last_login = NOW()
|
||||
WHERE id = ${id}$
|
||||
""", {'id': user.id})
|
||||
await remember_user(r[0].id, username=r[0].username, userorgid=r[0].orgid)
|
||||
return {
|
||||
"widgettype":"Message",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user