diff --git a/custom/.gitignore b/custom/.gitignore index 8e9508a9a89..97a003c841f 100644 --- a/custom/.gitignore +++ b/custom/.gitignore @@ -1,3 +1,4 @@ /* !/README.md !/.gitignore +!/__init__.py diff --git a/custom/__init__.py b/custom/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dev_utils/mongo_hooks.py b/dev_utils/mongo_hooks.py index 5dd8faede50..991d740b111 100644 --- a/dev_utils/mongo_hooks.py +++ b/dev_utils/mongo_hooks.py @@ -130,40 +130,43 @@ def denormalize_files_from_reports(reports): """Pull the file info from the FILES_COLL collection in to associated parts of the reports. """ - # Make sure we have a list whose objects we can modify in place instead of a mongo - # cursor as returned from mongo_find. - reports = list(reports) - file_dicts = [ - file_dict - for file_dict in itertools.chain.from_iterable(collect_file_dicts(report) for report in reports) - if FILE_REF_KEY in file_dict - ] - if not file_dicts: - # These are likely partial reports (like for an ajax request of a specific - # part of the report), had a projection applied that does not include any file - # information, or only the old-style of storing file information is present in - # these documents. - return reports - - file_refs = {file_dict[FILE_REF_KEY] for file_dict in file_dicts} - - file_docs = {} - batch_size = 50 - file_ref_iter = iter(file_refs) - while batch := tuple(itertools.islice(file_ref_iter, batch_size)): - # Reduce the size of the $in clause when there are large numbers of file refs by - # making multiple requests, passing batches of refs in. - for file_doc in mongo_find(FILES_COLL, {"_id": {"$in": batch}}, {TASK_IDS_KEY: 0}): - file_docs[file_doc.pop("_id")] = file_doc - - for file_dict in file_dicts: - if file_dict[FILE_REF_KEY] not in file_docs: - log.warning("Failed to find %s in %s collection.", FILES_COLL, file_dict[FILE_REF_KEY]) - continue - file_doc = file_docs[file_dict.pop(FILE_REF_KEY)] - file_dict.update(file_doc) - - return reports + def denormalize_generator(reports_iterable): + # Optimization: Ensure we have an iterator to avoid infinite loops on lists + reports_iter = iter(reports_iterable) + batch_size = 50 + while True: + # Grab a batch of reports from the cursor + reports_batch = list(itertools.islice(reports_iter, batch_size)) + if not reports_batch: + break + + file_dicts = [ + file_dict + for file_dict in itertools.chain.from_iterable(collect_file_dicts(report) for report in reports_batch) + if FILE_REF_KEY in file_dict + ] + + if file_dicts: + file_refs = {file_dict[FILE_REF_KEY] for file_dict in file_dicts} + file_docs = {} + file_ref_batch_size = 50 + file_ref_iter = iter(file_refs) + while batch := tuple(itertools.islice(file_ref_iter, file_ref_batch_size)): + # Reduce the size of the $in clause when there are large numbers of file refs by + # making multiple requests, passing batches of refs in. + for file_doc in mongo_find(FILES_COLL, {"_id": {"$in": batch}}, {TASK_IDS_KEY: 0}): + file_docs[file_doc.pop("_id")] = file_doc + + for file_dict in file_dicts: + if file_dict[FILE_REF_KEY] not in file_docs: + log.warning("Failed to find %s in %s collection.", FILES_COLL, file_dict[FILE_REF_KEY]) + continue + file_doc = file_docs[file_dict.pop(FILE_REF_KEY)] + file_dict.update(file_doc) + + yield from reports_batch + + return denormalize_generator(reports) @mongo_hook(mongo_find_one, "analysis") @@ -171,7 +174,8 @@ def denormalize_files(report): """Pull the file info from the FILES_COLL collection in to associated parts of the report. """ - denormalize_files_from_reports([report]) + # Consume the generator so the report is denormalized in-place + list(denormalize_files_from_reports([report])) return report diff --git a/dev_utils/mongodb.py b/dev_utils/mongodb.py index e156dc6ef4d..3dad5be7fc8 100644 --- a/dev_utils/mongodb.py +++ b/dev_utils/mongodb.py @@ -22,17 +22,25 @@ def connect_to_mongo() -> MongoClient: try: - return MongoClient( - host=repconf.mongodb.get("host", "127.0.0.1"), - port=repconf.mongodb.get("port", 27017), + host = repconf.mongodb.get("host", "127.0.0.1") + port = repconf.mongodb.get("port", 27017) + client = MongoClient( + host=host, + port=port, username=repconf.mongodb.get("username"), password=repconf.mongodb.get("password"), authSource=repconf.mongodb.get("authsource", "cuckoo"), tlsCAFile=repconf.mongodb.get("tlscafile", None), - connect=False, + connect=True, # Force connection now to catch issues + serverSelectionTimeoutMS=5000, + socketTimeoutMS=30000, ) - except (ConnectionFailure, ServerSelectionTimeoutError): - log.error("Cannot connect to MongoDB") + # Ping the server to ensure it's alive + client.admin.command('ping') + log.info("Successfully connected to MongoDB at %s:%s", host, port) + return client + except (ConnectionFailure, ServerSelectionTimeoutError) as e: + log.error("Cannot connect to MongoDB: %s", e) except Exception as e: log.warning("Unable to connect to MongoDB database: %s, %s", mdb, e) @@ -40,8 +48,27 @@ def connect_to_mongo() -> MongoClient: # q = results_db.analysis.find({"info.id": 26}, {"memory": 1}) # https://pymongo.readthedocs.io/en/stable/changelog.html - conn = connect_to_mongo() - results_db = conn[mdb] + _client = None + _results_db = None + + def get_mongodb(): + global _client, _results_db + if _client is None: + _client = connect_to_mongo() + _results_db = _client[mdb] + return _results_db + + # For legacy code that expects results_db to be an object + class LegacyDB: + @property + def analysis(self): return get_mongodb().analysis + @property + def calls(self): return get_mongodb().calls + @property + def files(self): return get_mongodb().files + def __getattr__(self, name): return getattr(get_mongodb(), name) + + results_db = LegacyDB() MAX_AUTO_RECONNECT_ATTEMPTS = 5 @@ -111,7 +138,7 @@ def mongo_insert_one(collection: str, doc): @graceful_auto_reconnect -def mongo_find(collection: str, query, projection=False, sort=None, limit=None): +def mongo_find(collection: str, query, projection=False, sort=None, limit=None, no_hooks=False): if sort is None: sort = [("_id", -1)] @@ -122,23 +149,30 @@ def mongo_find(collection: str, query, projection=False, sort=None, limit=None): find_by = functools.partial(find_by, limit=limit) result = find_by() - if result: + if result and not no_hooks: for hook in hooks[mongo_find][collection]: result = hook(result) return result @graceful_auto_reconnect -def mongo_find_one(collection: str, query, projection=False, sort=None): +def mongo_find_one(collection: str, query, projection=False, sort=None, max_time_ms=None, no_hooks=False): if sort is None: sort = [("_id", -1)] + + kwargs = {"sort": sort} + if max_time_ms: + kwargs["max_time_ms"] = max_time_ms + if projection: - result = getattr(results_db, collection).find_one(query, projection, sort=sort) + result = getattr(results_db, collection).find_one(query, projection, **kwargs) else: - result = getattr(results_db, collection).find_one(query, sort=sort) - if result: + result = getattr(results_db, collection).find_one(query, **kwargs) + + if result and not no_hooks: for hook in hooks[mongo_find_one][collection]: result = hook(result) + return result @@ -184,7 +218,7 @@ def mongo_find_one_and_update(collection, query, update, projection=None): @graceful_auto_reconnect def mongo_drop_database(database: str): - conn.drop_database(database) + get_mongodb().client.drop_database(database) def mongo_delete_data(task_ids: int | Sequence[int]) -> None: @@ -251,7 +285,7 @@ def mongo_delete_calls_by_task_id_in_range(*, range_start: int = 0, range_end: i def mongo_is_cluster(): # This is only useful at the moment for clean to prevent destruction of cluster database try: - conn.admin.command("listShards") + get_mongodb().client.admin.command("listShards") return True except OperationFailure: return False diff --git a/utils/gcp_pubsub_service.py b/utils/gcp_pubsub_service.py index 8fcb555ef82..b444c4dab61 100644 --- a/utils/gcp_pubsub_service.py +++ b/utils/gcp_pubsub_service.py @@ -7,8 +7,9 @@ import logging import os import sys -import tempfile +import shutil import threading +import warnings sys.path.append(os.path.join(os.path.abspath(os.path.dirname(__file__)), "..")) @@ -16,6 +17,7 @@ from lib.cuckoo.common.constants import CUCKOO_ROOT from lib.cuckoo.common.gcp import download_from_gcs from lib.cuckoo.common.path_utils import path_exists +from lib.cuckoo.common.utils import store_temp_file from lib.cuckoo.core.database import Database, init_database from lib.cuckoo.core.startup import check_user_permissions from utils.submit import submit_file @@ -25,6 +27,14 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s: %(message)s") log = logging.getLogger("gcp_pubsub_service") +warnings.filterwarnings( + "ignore", + message="You are using a non-supported Python version", + category=FutureWarning, + module="google\\.api_core", +) + + class GCPPubSubService: def __init__(self): self.gcp_cfg = Config("gcp") @@ -57,9 +67,9 @@ def __init__(self): self.db = Database() def process_message(self, message): + local_path = None try: payload = json.loads(message.data.decode("utf-8")) - log.info("Received payload: %s", payload.get("uuid")) sample_hash = payload.get("sample_hash") gcs_uri = payload.get("gcs_uri") @@ -90,17 +100,20 @@ def process_message(self, message): # Check if sample exists locally sample_hash = os.path.basename(sample_hash) local_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", sample_hash) - is_temp = False if not path_exists(local_path): log.info("Sample %s not found locally, fetching from GCS: %s", sample_hash, gcs_uri) - fd, temp_path = tempfile.mkstemp() - os.close(fd) + # Create a temporary path using store_temp_file with empty content + temp_path = store_temp_file(b"", sample_name) + if isinstance(temp_path, bytes): + temp_path = temp_path.decode() + if download_from_gcs(gcs_uri, temp_path): local_path = temp_path - is_temp = True else: log.error("Failed to download sample from GCS") + if os.path.exists(os.path.dirname(temp_path)): + shutil.rmtree(os.path.dirname(temp_path)) message.nack() return @@ -123,12 +136,6 @@ def process_message(self, message): except Exception as e: log.error("Failed to add task to database: %s", e) message.nack() - finally: - if is_temp and path_exists(local_path): - try: - os.unlink(local_path) - except Exception as e: - log.warning("Failed to delete temp file %s for task, %s: %s", local_path, payload.get("uuid"), e) except Exception as e: log.error("Error processing message: %s", e) diff --git a/web/analysis/views.py b/web/analysis/views.py index cda4d5afa9f..143716d9627 100644 --- a/web/analysis/views.py +++ b/web/analysis/views.py @@ -58,6 +58,15 @@ from lib.cuckoo.common.webadmin_utils import disable_user +# Support for custom on-demand services +try: + if settings.CUCKOO_PATH not in sys.path: + sys.path.append(settings.CUCKOO_PATH) + from custom.analysis_services import CUSTOM_SERVICES, handle_custom_service +except ImportError: + CUSTOM_SERVICES = [] + handle_custom_service = None + try: import re2 as re except ImportError: @@ -211,18 +220,13 @@ def _path_safe(path: str) -> bool: return True -def get_tags_tasks(task_ids: list) -> str: - for analysis in db.list_tasks(task_ids=task_ids): - return analysis.tags_tasks - - def get_task_package(task_id: int) -> str: task = db.view_task(task_id) task_dict = task.to_dict() return task_dict.get("package", "") -def get_analysis_info(db, id=-1, task=None): +def get_analysis_info(db, id=-1, task=None, rtmp=None): if not task: task = db.view_task(id) if not task: @@ -230,11 +234,14 @@ def get_analysis_info(db, id=-1, task=None): new = task.to_dict() if new["category"] in ("file", "pcap", "static") and new["sample_id"] is not None: - new["sample"] = db.view_sample(new["sample_id"]).to_dict() + if hasattr(task, "sample") and task.sample: + new["sample"] = task.sample.to_dict() + else: + new["sample"] = db.view_sample(new["sample_id"]).to_dict() filename = os.path.basename(new["target"]) new.update({"filename": filename}) - new.update({"user_task_tags": get_tags_tasks([new["id"]])}) + new["user_task_tags"] = task.tags_tasks if new.get("machine"): machine = new["machine"] @@ -242,9 +249,7 @@ def get_analysis_info(db, id=-1, task=None): machine = os.path.basename(machine) new.update({"machine": machine}) - rtmp = False - - if enabledconf["mongodb"]: + if not rtmp and enabledconf["mongodb"]: rtmp = mongo_find_one( "analysis", {"info.id": int(new["id"])}, @@ -355,10 +360,44 @@ def index(request, page=1): analyses_pcaps = [] analyses_static = [] - tasks_files = db.list_tasks(limit=TASK_LIMIT, offset=off, category="file", not_status=TASK_PENDING, tags_tasks_not_like="audit") - tasks_static = db.list_tasks(limit=TASK_LIMIT, offset=off, category="static", not_status=TASK_PENDING) - tasks_urls = db.list_tasks(limit=TASK_LIMIT, offset=off, category="url", not_status=TASK_PENDING) - tasks_pcaps = db.list_tasks(limit=TASK_LIMIT, offset=off, category="pcap", not_status=TASK_PENDING) + tasks_files = db.list_tasks( + limit=TASK_LIMIT, offset=off, category="file", not_status=TASK_PENDING, tags_tasks_not_like="audit", include_hashes=True + ) + tasks_static = db.list_tasks(limit=TASK_LIMIT, offset=off, category="static", not_status=TASK_PENDING, include_hashes=True) + tasks_urls = db.list_tasks(limit=TASK_LIMIT, offset=off, category="url", not_status=TASK_PENDING, include_hashes=True) + tasks_pcaps = db.list_tasks(limit=TASK_LIMIT, offset=off, category="pcap", not_status=TASK_PENDING, include_hashes=True) + + mongo_map = {} + if enabledconf["mongodb"]: + all_tasks = (tasks_files or []) + (tasks_static or []) + (tasks_urls or []) + (tasks_pcaps or []) + if all_tasks: + all_ids = [int(t.id) for t in all_tasks] + cursor = mongo_find( + "analysis", + {"info.id": {"$in": all_ids}}, + { + "info": 1, + "target.file.virustotal.summary": 1, + "url.virustotal.summary": 1, + "malscore": 1, + "detections": 1, + "network.pcap_sha256": 1, + "mlist_cnt": 1, + "f_mlist_cnt": 1, + "target.file.clamav": 1, + "suri_tls_cnt": 1, + "suri_alert_cnt": 1, + "suri_http_cnt": 1, + "suri_file_cnt": 1, + "trid": 1, + "_id": 0, + }, + sort=[("_id", -1)], + ) + for doc in cursor: + tid = doc.get("info", {}).get("id") + if tid and tid not in mongo_map: + mongo_map[tid] = doc # Vars to define when to show Next/Previous buttons paging = {} @@ -445,7 +484,7 @@ def index(request, page=1): if tasks_files: for task in tasks_files: - new = get_analysis_info(db, task=task) + new = get_analysis_info(db, task=task, rtmp=mongo_map.get(task.id)) if new["id"] == first_file: paging["show_file_next"] = "hide" if page <= 1: @@ -454,7 +493,7 @@ def index(request, page=1): # Added =: Fix page navigation for pages after the first page else: paging["show_file_prev"] = "show" - if db.view_errors(task.id): + if task.errors: new["errors"] = True analyses_files.append(new) @@ -463,13 +502,13 @@ def index(request, page=1): if tasks_static: for task in tasks_static: - new = get_analysis_info(db, task=task) + new = get_analysis_info(db, task=task, rtmp=mongo_map.get(task.id)) if new["id"] == first_static: paging["show_static_next"] = "hide" if page <= 1: paging["show_static_prev"] = "hide" - if db.view_errors(task.id): + if task.errors: new["errors"] = True analyses_static.append(new) @@ -478,13 +517,13 @@ def index(request, page=1): if tasks_urls: for task in tasks_urls: - new = get_analysis_info(db, task=task) + new = get_analysis_info(db, task=task, rtmp=mongo_map.get(task.id)) if new["id"] == first_url: paging["show_url_next"] = "hide" if page <= 1: paging["show_url_prev"] = "hide" - if db.view_errors(task.id): + if task.errors: new["errors"] = True analyses_urls.append(new) @@ -493,13 +532,13 @@ def index(request, page=1): if tasks_pcaps: for task in tasks_pcaps: - new = get_analysis_info(db, task=task) + new = get_analysis_info(db, task=task, rtmp=mongo_map.get(task.id)) if new["id"] == first_pcap: paging["show_pcap_next"] = "hide" if page <= 1: paging["show_pcap_prev"] = "hide" - if db.view_errors(task.id): + if task.errors: new["errors"] = True analyses_pcaps.append(new) @@ -531,24 +570,33 @@ def index(request, page=1): @conditional_login_required(login_required, settings.WEB_AUTHENTICATION) def pending(request): # db = Database() - tasks = db.list_tasks(status=TASK_PENDING) + tasks = db.list_tasks(status=TASK_PENDING, include_hashes=True) pending = [] for task in tasks: # Some tasks do not have sample attributes - sample = db.view_sample(task.sample_id) - if sample: + if task.sample: pending.append( { "id": task.id, "target": task.target, "added_on": task.added_on, "category": task.category, - "md5": sample.md5, - "sha256": sample.sha256, + "md5": task.sample.md5, + "sha256": task.sample.sha256, + } + ) + else: + pending.append( + { + "id": task.id, + "target": task.target, + "added_on": task.added_on, + "category": task.category, + "md5": "", + "sha256": "", } ) - data = {"tasks": pending, "count": len(pending), "title": "Pending Tasks"} return render(request, "analysis/pending.html", data) @@ -1398,6 +1446,8 @@ def filtered_chunk(request, task_id, pid, category, apilist, caller, tid): chunk = mongo_find_one("calls", {"_id": call}) if es_as_db: chunk = es.search(index=get_calls_index(), body={"query": {"match": {"_id": call}}})["hits"]["hits"][0]["_source"] + if not chunk: + continue for call in chunk.get("calls", []): # filter by call or tid if caller != "null" or tid != "0": @@ -1875,7 +1925,14 @@ def split_signature_calls(report): continue calls = [] non_calls = [] - for datum in sig.pop("data", []): + data_items = sig.pop("data", []) + # Optimization: Limit the number of data items processed per signature + # Many signatures have thousands of matches which can crash the web UI/uWSGI + if len(data_items) > 1000: + data_items = data_items[:1000] + sig["data_truncated"] = True + + for datum in data_items: if datum.get("type") == "call": calls.append(datum) else: @@ -1889,39 +1946,107 @@ def split_signature_calls(report): @require_safe @conditional_login_required(login_required, settings.WEB_AUTHENTICATION) -@ratelimit(key="ip", rate=my_rate_seconds, block=rateblock) -@ratelimit(key="ip", rate=my_rate_minutes, block=rateblock) def report(request, task_id): - network_report = False + network_report = {} report = {} if enabledconf["mongodb"]: + # Optimization: Fetch only essential metadata first. + # We can fetch more via AJAX or subsequent targeted queries if needed. + # Added 10s timeout to prevent worker hang + # Re-enabling hooks because we fixed the infinite loop in denormalize_files + # and we need the hook to populate 'target' hashes from the files collection. + projection = { + "info": 1, + "target": 1, + "signatures": 1, + "malscore": 1, + "malstatus": 1, + "detections": 1, + "trid": 1, + "virustotal": 1, + "virustotal_summary": 1, + "malware_conf": 1, + "CAPE.configs": 1, + "capa_summary": 1, + "curtain": 1, + "mitre_attck": 1, + "statistics": 1, + "shots": 1, + "debug": 1, + "behavior.summary": 1, + "network.domains": 1, + "network.dns": 1, + "network.hosts": 1, + "reversinglabs": 1, + "tcr_config_lookup": 1, + "_id": 0, + } + if CUSTOM_SERVICES: + for service in CUSTOM_SERVICES: + projection[service] = 1 + report = mongo_find_one( "analysis", {"info.id": int(task_id)}, - {"dropped": 0, "CAPE.payloads": 0, "procdump": 0, "procmemory": 0, "behavior.processes": 0, "network": 0, "memory": 0}, + projection, sort=[("_id", -1)], + max_time_ms=10000, + no_hooks=False, ) - network_report = mongo_find_one( + + # Lightweight existence check for tabs + # Bypass hooks here too + existence = mongo_find_one( "analysis", {"info.id": int(task_id)}, - {"network.domains": 1, "network.dns": 1, "network.hosts": 1}, - sort=[("_id", -1)], + {"sigma": 1, "sysmon": 1, "misp": 1, "classification": 1, "_id": 0}, + no_hooks=True, ) + if report and existence: + for field in ("sigma", "sysmon", "misp", "classification"): + if existence.get(field): + report[field] = True + + if report and "network" in report: + network_report = { + "network": { + "domains": report["network"].get("domains"), + "dns": report["network"].get("dns"), + "hosts": report["network"].get("hosts"), + } + } + report = split_signature_calls(report) if es_as_db: - query = es.search(index=get_analysis_index(), query=get_query_by_info_id(task_id))["hits"]["hits"][0] - report = query["_source"] - # Extract out data for Admin tab in the analysis page - network_report = es.search( - index=get_analysis_index(), - query=get_query_by_info_id(task_id), - _source=["network.domains", "network.dns", "network.hosts"], - )["hits"]["hits"][0]["_source"] + try: + es_query = es.search(index=get_analysis_index(), query=get_query_by_info_id(task_id)) + if es_query["hits"]["total"]["value"] > 0: + query_res = es_query["hits"]["hits"][0] + es_report = query_res["_source"] + + # Merge ES data into existing report (preserving custom fields from MongoDB) + if report: + for key, value in es_report.items(): + if key not in report or report[key] is None: + report[key] = value + else: + report = es_report + + # Extract out data for Admin tab in the analysis page + net_res = es.search( + index=get_analysis_index(), + query=get_query_by_info_id(task_id), + _source=["network.domains", "network.dns", "network.hosts"], + ) + if net_res["hits"]["total"]["value"] > 0: + network_report = net_res["hits"]["hits"][0]["_source"] - # Extract out data for Admin tab in the analysis page - esdata = {"index": query["_index"], "id": query["_id"]} - report["es"] = esdata + # Extract out data for Admin tab in the analysis page + esdata = {"index": query_res["_index"], "id": query_res["_id"]} + report["es"] = esdata + except Exception: + pass if not report: if DISABLED_WEB: msg = "You need to enable Mongodb/ES to be able to use WEBGUI to see the analysis" @@ -1930,6 +2055,21 @@ def report(request, task_id): return render(request, "error.html", {"error": msg}) + # Enforce TLP RED restrictions on the Web UI + # if report.get("info", {}).get("tlp", "").lower() == "red" and not request.user.is_staff: + # return render(request, "error.html", {"error": "Task has a TLP of RED and is restricted to staff."}) + + if report.get("info", {}).get("category", "") in ("file", "pcap", "static") and not report.get("target", {}).get( + "file", {} + ).get("sha256"): + return render( + request, + "error.html", + { + "error": "Report doesn't exist anymore! Or maybe just target data is missing, which means we don't have info about initial binary" + }, + ) + if isinstance(report.get("CAPE"), dict) and report.get("CAPE", {}).get("configs", {}): report["malware_conf"] = report["CAPE"]["configs"] report["CAPE"] = 0 @@ -1937,51 +2077,76 @@ def report(request, task_id): report["procdump"] = 0 report["memory"] = 0 - for key, value in (("dropped", "dropped"), ("procdump", "procdump"), ("CAPE.payloads", "CAPE"), ("procmemory", "procmemory")): - if enabledconf["mongodb"]: - try: - report[value] = list( - mongo_aggregate( - "analysis", - [ - {"$match": {"info.id": int(task_id)}}, - { - "$project": { - "_id": 0, - f"{value}_size": { - "$add": [ - {"$size": {"$ifNull": [f"${key}.{subkey}", []]}} for subkey in ("sha256", "file_ref") - ] - }, + if enabledconf["mongodb"]: + try: + # Optimization: Consolidate 4 aggregation calls into one to reduce DB round-trips + agg_results = list( + mongo_aggregate( + "analysis", + [ + {"$match": {"info.id": int(task_id)}}, + { + "$project": { + "_id": 0, + "dropped": { + "$add": [ + {"$size": {"$ifNull": ["$dropped.sha256", []]}}, + {"$size": {"$ifNull": ["$dropped.file_ref", []]}}, + ] }, - }, - ], - ) - )[0][f"{value}_size"] - except Exception: - report[value] = 0 - - elif es_as_db: - try: - report[value] = len( - es.search(index=get_analysis_index(), query=get_query_by_info_id(task_id), _source=[f"{key}.sha256"])["hits"][ - "hits" - ][0]["_source"].get(key) + "procdump": { + "$add": [ + {"$size": {"$ifNull": ["$procdump.sha256", []]}}, + {"$size": {"$ifNull": ["$procdump.file_ref", []]}}, + ] + }, + "CAPE": { + "$add": [ + {"$size": {"$ifNull": ["$CAPE.payloads.sha256", []]}}, + {"$size": {"$ifNull": ["$CAPE.payloads.file_ref", []]}}, + ] + }, + "procmemory": { + "$add": [ + {"$size": {"$ifNull": ["$procmemory.sha256", []]}}, + {"$size": {"$ifNull": ["$procmemory.file_ref", []]}}, + ] + }, + } + }, + ], ) - except Exception as e: - print(e) + ) + if agg_results: + report.update(agg_results[0]) + except Exception: + for val in ("dropped", "procdump", "CAPE", "procmemory"): + report[val] = 0 + + elif es_as_db: + try: + es_res = es.search(index=get_analysis_index(), query=get_query_by_info_id(task_id), _source=["dropped.sha256", "procdump.sha256", "CAPE.payloads.sha256", "procmemory.sha256"]) + if es_res["hits"]["total"]["value"] > 0: + source = es_res["hits"]["hits"][0]["_source"] + report["dropped"] = len(source.get("dropped") or []) + report["procdump"] = len(source.get("procdump") or []) + report["CAPE"] = len(source.get("CAPE", {}).get("payloads") or []) + report["procmemory"] = len(source.get("procmemory") or []) + except Exception: + pass try: if enabledconf["mongodb"]: - tmp_data = list(mongo_find("analysis", {"info.id": int(task_id), "memory": {"$exists": True}})) + # Optimization: Use mongo_find_one with projection to avoid loading massive documents just to check for field existence + tmp_data = mongo_find_one("analysis", {"info.id": int(task_id), "memory": {"$exists": True}}, {"_id": 1}) if tmp_data: - report["memory"] = tmp_data[0]["_id"] or 0 + report["memory"] = tmp_data["_id"] or 0 elif es_as_db: report["memory"] = len( es.search(index=get_analysis_index(), query=get_query_by_info_id(task_id), _source=["memory"])["hits"]["hits"] ) - except Exception as e: - print(e) + except Exception: + pass reports_exist = {} # check if we allow dl reports only to specific users @@ -2051,32 +2216,44 @@ def report(request, task_id): bingraph_dict_content = {} bingraph_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(task_id), "bingraph") if path_exists(bingraph_path): - for file in os.listdir(bingraph_path): + # Optimization: Limit number of bingraphs to avoid memory/timeout issues + bingraph_files = os.listdir(bingraph_path)[:10] + for file in bingraph_files: tmp_file = os.path.join(bingraph_path, file) - bingraph_dict_content.setdefault(os.path.basename(tmp_file).split("-", 1)[0], Path(tmp_file).read_text()) + if path_exists(tmp_file) and _path_safe(tmp_file): + # Cap SVG size at 512KB + if path_get_size(tmp_file) < 512 * 1024: + bingraph_dict_content.setdefault(os.path.basename(tmp_file).split("-", 1)[0], Path(tmp_file).read_text()) domainlookups = {} iplookups = {} if network_report.get("network", {}): report["network"] = network_report["network"] - if "domains" in network_report["network"]: - domainlookups = dict((i["domain"], i["ip"]) for i in network_report["network"]["domains"]) - iplookups = dict((i["ip"], i["domain"]) for i in network_report["network"]["domains"]) - for i in network_report["network"]["dns"]: - for a in i["answers"]: + if "domains" in network_report["network"] and network_report["network"]["domains"]: + # Optimization: Cap lookups to prevent timeouts on massive reports + domains = network_report["network"]["domains"][:1000] + domainlookups = {i["domain"]: i["ip"] for i in domains} + iplookups = {i["ip"]: i["domain"] for i in domains} + + if "dns" in network_report["network"] and network_report["network"]["dns"]: + dns = network_report["network"]["dns"][:1000] + for i in dns: + for a in i.get("answers", []): iplookups[a["data"]] = i["request"] if HAVE_REQUEST and enabledconf["distributed"]: try: res = requests.get(f"http://127.0.0.1:9003/task/{task_id}", timeout=3, verify=False) if res and res.ok: - if "name" in res.json(): - report["distributed"] = {} - report["distributed"]["name"] = res.json()["name"] - report["distributed"]["task_id"] = res.json()["task_id"] - except Exception as e: - print(e) + res_data = res.json() + if "name" in res_data: + report["distributed"] = { + "name": res_data["name"], + "task_id": res_data["task_id"] + } + except Exception: + pass stats_total = { "total": 0, @@ -2093,21 +2270,16 @@ def report(request, task_id): stats_total[stats_category] = "{:.2f}".format(total) stats_total["total"] = "{:.2f}".format(stats_total["total"]) - if HAVE_REQUEST and enabledconf["distributed"]: - try: - res = requests.get(f"http://127.0.0.1:9003/task/{task_id}", timeout=3, verify=False) - if res and res.ok: - res = res.json() - if "name" in res: - report["distributed"] = {} - report["distributed"]["name"] = res["name"] - report["distributed"]["task_id"] = res["task_id"] - except Exception as e: - print(e) existent_tasks = {} if web_cfg.general.get("existent_tasks", False) and report.get("target", {}).get("file", {}).get("sha256"): - records = perform_search("sha256", report["target"]["file"]["sha256"]) + # Limit results and only fetch detections to avoid loading full reports + records = perform_search( + "sha256", + report["target"]["file"]["sha256"], + search_limit=10, + projection={"info.id": 1, "detections": 1, "_id": 0}, + ) for record in records: if record["info"]["id"] == report["info"]["id"]: continue @@ -2115,8 +2287,16 @@ def report(request, task_id): # process log per task if enabled: process_log_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(task_id), "process.log") - if web_cfg.general.expose_process_log and path_exists(process_log_path) and path_get_size(process_log_path): - report["process_log"] = path_read_file(process_log_path, mode="text") + if web_cfg.general.expose_process_log and path_exists(process_log_path): + log_size = path_get_size(process_log_path) + if log_size > 0: + # Limit to first 1MB to avoid memory/timeout issues + max_size = 1024 * 1024 + if log_size > max_size: + with open(process_log_path, "r") as f: + report["process_log"] = f.read(max_size) + "\n... [TRUNCATED - LOG TOO LARGE] ..." + else: + report["process_log"] = path_read_file(process_log_path, mode="text") return render( request, @@ -2604,6 +2784,7 @@ def filereport(request, task_id, category): "misp": "misp.json", "litereport": "lite.json", "cents": "cents.rules", + "parti": "report.parti", } if category in formats: @@ -2984,6 +3165,7 @@ def statistics_data(request, days=7): "xlsdeobf": processing_cfg, "strings": processing_cfg, "floss": integrations_cfg, + "virustotal": integrations_cfg, } @@ -3006,92 +3188,93 @@ def on_demand(request, service: str, task_id: str, category: str, sha256): # 4. reload page """ - if service not in ( - "bingraph", - "flare_capa", - "vba2graph", - "virustotal", - "xlsdeobf", - "strings", - "floss", - ) and not getattr( - on_demand_config_mapper.get(service, {}), service - ).get("on_demand"): - return render(request, "error.html", {"error": "Not supported/enabled service on demand"}) + if service in CUSTOM_SERVICES: + pass + elif service in on_demand_config_mapper: + config_section = getattr(on_demand_config_mapper[service], service, {}) + if not config_section.get("on_demand"): + return render(request, "error.html", {"error": f"{service} on demand is disabled in configuration"}) + else: + return render(request, "error.html", {"error": f"Unsupported service: {service}"}) # Restrict category to known report sections writable by this endpoint. allowed_categories = {"static", "CAPE", "procdump", "procmemory", "dropped"} if category not in allowed_categories: return render(request, "error.html", {"error": f"Unsupported category: {category}"}, status=400) - # Self Extracted support folder - path = os.path.join(CUCKOO_ROOT, "storage", "analyses", task_id, "selfextracted", sha256) - - if not path_exists(path): - extractedfile = False - if category == "static": - path = os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, "binary") - category = "target.file" - elif category == "dropped": - path = os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, "files", sha256) - else: - path = os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, category, sha256) + details = False + if service in CUSTOM_SERVICES and handle_custom_service: + details, category = handle_custom_service(service, task_id, sha256) else: - # selfextracted storage is shared by multiple categories; keep non-static category intact - if category == "static": - category = "target.file" - extractedfile = True - - if path and (not _path_safe(path) or not path_exists(path)): - return render(request, "error.html", {"error": "File not found: {}".format(path)}) + # Self Extracted support folder + path = os.path.join(CUCKOO_ROOT, "storage", "analyses", task_id, "selfextracted", sha256) - details = False - if service == "flare_capa" and HAVE_FLARE_CAPA: - # ToDo check if PE - details = flare_capa_details(path, category.lower(), on_demand=True) - if not details: - details = {"msg": "No results"} - - elif service == "vba2graph" and HAVE_VBA2GRAPH: - vba2graph_func(path, task_id, sha256, on_demand=True) - - elif service == "strings" and HAVE_STRINGS: - details = extract_strings(path, on_demand=True) - if not details: - details = {"strings": "No strings extracted"} - - elif service == "virustotal" and HAVE_VIRUSTOTAL: - details = vt_lookup("file", sha256, on_demand=True) - if not details: - details = {"msg": "No results"} - - elif service == "xlsdeobf" and HAVE_XLM_DEOBF: - details = xlmdeobfuscate(path, task_id, on_demand=True) - if not details: - details = {"msg": "No results"} - elif ( - service == "bingraph" - and HAVE_BINGRAPH - and reporting_cfg.bingraph.enabled - and reporting_cfg.bingraph.on_demand - and not path_exists(os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, "bingraph", sha256 + "-ent.svg")) - ): - bingraph_path = os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, "bingraph") - if not path_exists(bingraph_path): - path_mkdir(bingraph_path) - try: - bingraph_args_dict.update({"prefix": sha256, "files": [path], "save_dir": bingraph_path}) + if not path_exists(path): + extractedfile = False + if category == "static": + path = os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, "binary") + category = "target.file" + elif category == "dropped": + path = os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, "files", sha256) + else: + path = os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, category, sha256) + else: + # selfextracted storage is shared by multiple categories; keep non-static category intact + if category == "static": + category = "target.file" + extractedfile = True + + if path and (not _path_safe(path) or not path_exists(path)): + return render(request, "error.html", {"error": "File not found: {}".format(path)}) + + details = False + if service == "flare_capa" and HAVE_FLARE_CAPA: + # ToDo check if PE + details = flare_capa_details(path, category.lower(), on_demand=True) + if not details: + details = {"msg": "No results"} + + elif service == "vba2graph" and HAVE_VBA2GRAPH: + vba2graph_func(path, task_id, sha256, on_demand=True) + + elif service == "strings" and HAVE_STRINGS: + details = extract_strings(path, on_demand=True) + if not details: + details = {"strings": "No strings extracted"} + + elif service == "virustotal" and HAVE_VIRUSTOTAL: + details = vt_lookup("file", sha256, on_demand=True) + if not details: + details = {"msg": "No results"} + + elif service == "xlsdeobf" and HAVE_XLM_DEOBF: + details = xlmdeobfuscate(path, task_id, on_demand=True) + if not details: + details = {"msg": "No results"} + elif ( + service == "bingraph" + and HAVE_BINGRAPH + and reporting_cfg.bingraph.enabled + and reporting_cfg.bingraph.on_demand + and not path_exists(os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, "bingraph", sha256 + "-ent.svg")) + ): + bingraph_path = os.path.join(ANALYSIS_BASE_PATH, "analyses", task_id, "bingraph") + if not path_exists(bingraph_path): + path_mkdir(bingraph_path) try: - bingraph_gen(bingraph_args_dict) + bingraph_args_dict.update({"prefix": sha256, "files": [path], "save_dir": bingraph_path}) + try: + bingraph_gen(bingraph_args_dict) + except Exception as e: + print("Can't generate bingraph for {}: {}".format(sha256, e)) except Exception as e: - print("Can't generate bingraph for {}: {}".format(sha256, e)) - except Exception as e: - print("Bingraph on demand error:", e) - elif service == "floss" and HAVE_FLOSS: - package = get_task_package(task_id) - details = Floss(path, package, on_demand=True).run() - if not details: - details = {"msg": "No results"} + print("Bingraph on demand error:", e) + + elif service == "floss" and HAVE_FLOSS: + package = get_task_package(task_id) + details = Floss(path, package, on_demand=True).run() + if not details: + details = {"msg": "No results"} def _set_service_by_sha256(node, target_sha256, service_name, service_details): if isinstance(node, dict): @@ -3108,16 +3291,19 @@ def _set_service_by_sha256(node, target_sha256, service_name, service_details): return True return False - if details: - buf = mongo_find_one("analysis", {"info.id": int(task_id)}, {"_id": 1, category: 1}) + if details is not False: + # Use no_hooks=True to avoid running heavy hooks just to get the _id for update + buf = mongo_find_one("analysis", {"info.id": int(task_id)}, {"_id": 1, category: 1}, no_hooks=True) + if not buf: + return render(request, "error.html", {"error": f"Task {task_id} not found in results database"}) servicedata = {} if category == "CAPE": - _set_service_by_sha256(buf[category].get("payloads", []) or [], sha256, service, details) - servicedata = buf[category] + _set_service_by_sha256(buf.get(category, {}).get("payloads", []) or [], sha256, service, details) + servicedata = buf.get(category) elif category in ("procdump", "procmemory", "dropped"): - _set_service_by_sha256(buf[category] or [], sha256, service, details) - servicedata = buf[category] + _set_service_by_sha256(buf.get(category) or [], sha256, service, details) + servicedata = buf.get(category) elif category == "target.file": servicedata = buf.get("target", {}).get("file", {}) if servicedata: @@ -3127,8 +3313,12 @@ def _set_service_by_sha256(node, target_sha256, service_name, service_details): _set_service_by_sha256(servicedata, sha256, service, details) else: servicedata.setdefault(service, details) + elif service in CUSTOM_SERVICES: + servicedata = details + category = service - if servicedata: + # Always try to save if details were found (even if empty) to mark the task as done + if servicedata is not None: try: mongo_update_one("analysis", {"_id": ObjectId(buf["_id"])}, {"$set": {category: servicedata}}) except MONGO_DOCUMENT_TOO_LARGE_ERRORS: @@ -3143,8 +3333,7 @@ def _set_service_by_sha256(node, target_sha256, service_name, service_details): }, status=413, ) - except Exception as e: - print(f"on_demand update failed for task_id={task_id} service={service} category={category} sha256={sha256}: {e}") + except Exception: return render( request, "error.html", @@ -3152,7 +3341,6 @@ def _set_service_by_sha256(node, target_sha256, service_name, service_details): status=500, ) del details - return redirect("report", task_id=task_id) diff --git a/web/apiv2/views.py b/web/apiv2/views.py index 51f8be446c0..4d8954bf3e2 100644 --- a/web/apiv2/views.py +++ b/web/apiv2/views.py @@ -31,6 +31,7 @@ from lib.cuckoo.common.constants import ANALYSIS_BASE_PATH, CUCKOO_ROOT, CUCKOO_VERSION from lib.cuckoo.common.exceptions import CuckooDemuxError from lib.cuckoo.common.path_utils import path_delete, path_exists +from lib.cuckoo.common.iocs import report_to_iocs, load_iocs from lib.cuckoo.common.saztopcap import saz_to_pcap from lib.cuckoo.common.utils import ( convert_to_printable, @@ -129,7 +130,6 @@ DIST_ENABLED = False if dist_conf.distributed.enabled: from lib.cuckoo.common.dist_db import create_session - from lib.cuckoo.common.dist_db import Task as DTask dist_session = create_session( dist_conf.distributed.db, @@ -255,7 +255,6 @@ def tasks_create_static(request): if callback: resp["url"] = ["{0}/submit/status/{1}/".format(apiconf.api.get("url"), task_ids[0])] else: - resp["data"] = {} resp["data"]["message"] = "Task IDs {0} have been submitted".format(", ".join(str(x) for x in task_ids)) if callback: resp["url"] = [] @@ -280,7 +279,7 @@ def tasks_create_file(request): if request.FILES.getlist("file") == []: resp = {"error": True, "error_value": "No file was submitted"} return Response(resp) - resp["error"] = False + resp["error"] = [] # Parse potential POST options (see submission/views.py) pcap = request.data.get("pcap", "") @@ -318,7 +317,6 @@ def tasks_create_file(request): "only_extraction": False, "user_id": request.user.id or 0, } - task_machines = [] vm_list = [vm.label for vm in db.list_machines()] @@ -418,7 +416,7 @@ def tasks_create_url(request): resp = {} if request.method == "POST": - resp["error"] = False + resp["error"] = [] url = request.data.get("url") ( @@ -519,7 +517,7 @@ def tasks_create_dlnexec(request): resp = {"error": True, "error_value": "DL&Exec Create API is Disabled"} return Response(resp) - resp["error"] = False + resp["error"] = [] url = request.data.get("dlnexec") if not url: resp = {"error": True, "error_value": "URL value is empty"} @@ -628,7 +626,7 @@ def files_view(request, md5=None, sha1=None, sha256=None, sample_id=None): resp = {} if md5 or sha1 or sha256 or sample_id: - resp["error"] = False + resp["error"] = [] """ for key, value in (("md5", md5), ("sha1", sha1), ("sha256", sha256), ("id", sample_id)): if value: @@ -679,7 +677,7 @@ def tasks_search(request, md5=None, sha1=None, sha256=None): return Response(resp) if md5 or sha1 or sha256: - resp["error"] = False + resp["error"] = [] if md5: if not apiconf.tasksearch.get("md5"): resp = {"error": True, "error_value": "Task Search by MD5 is Disabled"} @@ -706,11 +704,13 @@ def tasks_search(request, md5=None, sha1=None, sha256=None): sids = [sample.to_dict()["id"]] resp["data"] = [] for sid in sids: - tasks = db.list_tasks(sample_id=sid) + tasks = db.list_tasks(sample_id=sid, include_hashes=True) for task in tasks: buf = task.to_dict() # Remove path information, just grab the file name buf["target"] = buf["target"].rsplit("/", 1)[-1] + if task.sample: + buf["sample"] = task.sample.to_dict() resp["data"].append(buf) else: resp = {"data": [], "error": False} @@ -842,6 +842,7 @@ def tasks_list(request, offset=None, limit=None, window=None): status=status, options_like=option, order_by=Task.completed_on.desc(), + include_hashes=True, ) if not tasks: @@ -863,10 +864,8 @@ def tasks_list(request, offset=None, limit=None, window=None): task["errors"].append(error.message) task["sample"] = {} - if row.sample_id: - sample = db.view_sample(row.sample_id) - if sample: - task["sample"] = sample.to_dict() + if row.sample: + task["sample"] = row.sample.to_dict() if task.get("target"): task["target"] = convert_to_printable(task["target"]) @@ -910,7 +909,7 @@ def tasks_view(request, task_id): if m: task_id = int(m.group("taskid")) task = db.view_task(task_id, details=True) - resp["error"] = False + resp["error"] = [] if task: entry = task.to_dict() if entry["category"] != "url": @@ -1034,7 +1033,7 @@ def tasks_reschedule(request, task_id): resp = {} new_task_id = db.reschedule(task_id) if new_task_id: - resp["error"] = False + resp["error"] = [] resp["data"] = {} resp["data"]["new_task_id"] = new_task_id resp["data"]["message"] = "Task ID {0} has been rescheduled".format(task_id) @@ -1168,12 +1167,11 @@ def tasks_report(request, task_id, report_format="json", make_zip=False): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -1256,9 +1254,7 @@ def tasks_report(request, task_id, report_format="json", make_zip=False): if make_zip: if os.path.exists(report_path + ".zip"): report_path += ".zip" - resp = StreamingHttpResponse( - FileWrapper(open(report_path, "rb"), 8096), content_type="application/zip" - ) + resp = StreamingHttpResponse(FileWrapper(open(report_path, "rb"), 8096), content_type="application/zip") resp["Content-Length"] = os.path.getsize(report_path) resp["Content-Disposition"] = "attachment; filename=" + fname else: @@ -1342,11 +1338,24 @@ def tasks_iocs(request, task_id, detail=None): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) + data = load_iocs(task_id, detail) + if data is not None: + # security check + mti = data.get("info", {}).get("options", {}).get("main_task_id") + + if mti and task_id != mti: + resp = { + "error": True, + "error_value": f"Data doesn't match task id: {task_id} - main task id: {mti}, report to TCR for investigation.", + } + return Response(resp) + resp = {"error": False, "data": data} + return Response(resp) rtid = check.get("rtid", 0) if rtid: @@ -1373,196 +1382,15 @@ def tasks_iocs(request, task_id, detail=None): resp = {"error": True, "error_value": "Unable to retrieve report to parse for IOCs"} return Response(resp) - data = {} - if "tr_extractor" in buf: - data["tr_extractor"] = buf["tr_extractor"] - if "certs" in buf: - data["certs"] = buf["certs"] - data["detections"] = buf.get("detections") - data["malscore"] = buf["malscore"] - data["info"] = buf["info"] - del data["info"]["custom"] - # The machines key won't exist in cases where an x64 binary is submitted - # when there are no x64 machines. - if data.get("info", {}).get("machine", {}) and isinstance(data["info"]["machine"], dict): - del data["info"]["machine"]["manager"] - del data["info"]["machine"]["label"] - del data["info"]["machine"]["id"] - data["signatures"] = [] - """ - # Grab sigs - for sig in buf["signatures"]: - del sig["alert"] - data["signatures"].append(sig) - """ - # Grab target file info - if "target" in list(buf.keys()): - data["target"] = buf["target"] - if data["target"]["category"] == "file": - del data["target"]["file"]["path"] - del data["target"]["file"]["guest_paths"] - - data["network"] = {} - if "network" in list(buf.keys()) and buf["network"]: - data["network"]["traffic"] = {} - for netitem in ("tcp", "udp", "irc", "http", "dns", "smtp", "hosts", "domains"): - if netitem in buf["network"]: - data["network"]["traffic"][netitem + "_count"] = len(buf["network"][netitem]) - else: - data["network"]["traffic"][netitem + "_count"] = 0 - data["network"]["traffic"]["http"] = buf["network"]["http"] - data["network"]["hosts"] = buf["network"]["hosts"] - data["network"]["domains"] = buf["network"]["domains"] - data["network"]["ids"] = {} - if "suricata" in list(buf.keys()) and isinstance(buf["suricata"], dict): - data["network"]["ids"]["totalalerts"] = len(buf["suricata"]["alerts"]) - data["network"]["ids"]["alerts"] = buf["suricata"]["alerts"] - data["network"]["ids"]["http"] = buf["suricata"]["http"] - data["network"]["ids"]["totalfiles"] = len(buf["suricata"]["files"]) - data["network"]["ids"]["files"] = [] - for surifile in buf["suricata"]["files"]: - if "file_info" in list(surifile.keys()): - tmpfile = surifile - tmpfile["sha1"] = surifile["file_info"]["sha1"] - tmpfile["md5"] = surifile["file_info"]["md5"] - tmpfile["sha256"] = surifile["file_info"]["sha256"] - tmpfile["sha512"] = surifile["file_info"]["sha512"] - del tmpfile["file_info"] - data["network"]["ids"]["files"].append(tmpfile) - - data["static"] = {} - if "static" in list(buf.keys()): - pe = {} - pdf = {} - office = {} - if buf["static"].get("peid_signatures"): - pe["peid_signatures"] = buf["static"]["peid_signatures"] - if buf["static"].get("pe_timestamp"): - pe["pe_timestamp"] = buf["static"]["pe_timestamp"] - if buf["static"].get("pe_imphash"): - pe["pe_imphash"] = buf["static"]["pe_imphash"] - if buf["static"].get("pe_icon_hash"): - pe["pe_icon_hash"] = buf["static"]["pe_icon_hash"] - if buf["static"].get("pe_icon_fuzzy"): - pe["pe_icon_fuzzy"] = buf["static"]["pe_icon_fuzzy"] - if buf["static"].get("Objects"): - pdf["objects"] = len(buf["static"]["Objects"]) - if buf["static"].get("Info"): - if "PDF Header" in list(buf["static"]["Info"].keys()): - pdf["header"] = buf["static"]["Info"]["PDF Header"] - if "Streams" in buf["static"]: - if "/Page" in list(buf["static"]["Streams"].keys()): - pdf["pages"] = buf["static"]["Streams"]["/Page"] - if buf["static"].get("Macro"): - if "Analysis" in buf["static"]["Macro"]: - office["signatures"] = {} - for item in buf["static"]["Macro"]["Analysis"]: - office["signatures"][item] = [] - for indicator, desc in buf["static"]["Macro"]["Analysis"][item]: - office["signatures"][item].append((indicator, desc)) - if "Code" in buf["static"]["Macro"]: - office["macros"] = len(buf["static"]["Macro"]["Code"]) - data["static"]["pe"] = pe - data["static"]["pdf"] = pdf - data["static"]["office"] = office - - data["files"] = {} - data["files"]["modified"] = [] - data["files"]["deleted"] = [] - data["registry"] = {} - data["registry"]["modified"] = [] - data["registry"]["deleted"] = [] - data["mutexes"] = [] - data["executed_commands"] = [] - data["dropped"] = [] - - if "behavior" in buf and "summary" in buf["behavior"]: - if "write_files" in buf["behavior"]["summary"]: - data["files"]["modified"] = buf["behavior"]["summary"]["write_files"] - if "delete_files" in buf["behavior"]["summary"]: - data["files"]["deleted"] = buf["behavior"]["summary"]["delete_files"] - if "write_keys" in buf["behavior"]["summary"]: - data["registry"]["modified"] = buf["behavior"]["summary"]["write_keys"] - if "delete_keys" in buf["behavior"]["summary"]: - data["registry"]["deleted"] = buf["behavior"]["summary"]["delete_keys"] - if "mutexes" in buf["behavior"]["summary"]: - data["mutexes"] = buf["behavior"]["summary"]["mutexes"] - if "executed_commands" in buf["behavior"]["summary"]: - data["executed_commands"] = buf["behavior"]["summary"]["executed_commands"] - - data["process_tree"] = {} - if "behavior" in buf and "processtree" in buf["behavior"] and len(buf["behavior"]["processtree"]) > 0: - data["process_tree"] = { - "pid": buf["behavior"]["processtree"][0]["pid"], - "name": buf["behavior"]["processtree"][0]["name"], - "spawned_processes": [ - createProcessTreeNode(child_process) for child_process in buf["behavior"]["processtree"][0]["children"] - ], - } - if "dropped" in buf: - for entry in buf["dropped"]: - tmpdict = {} - if entry.get("clamav", False): - tmpdict["clamav"] = entry["clamav"] - if entry.get("sha256"): - tmpdict["sha256"] = entry["sha256"] - if entry.get("md5"): - tmpdict["md5"] = entry["md5"] - if entry.get("yara"): - tmpdict["yara"] = entry["yara"] - if entry.get("trid"): - tmpdict["trid"] = entry["trid"] - if entry.get("type"): - tmpdict["type"] = entry["type"] - if entry.get("guest_paths"): - tmpdict["guest_paths"] = entry["guest_paths"] - data["dropped"].append(tmpdict) - - if not detail: - resp = {"error": False, "data": data} - return Response(resp) - - if "static" in buf: - if buf["static"].get("pe_versioninfo"): - data["static"]["pe"]["pe_versioninfo"] = buf["static"]["pe_versioninfo"] - - if "behavior" in buf and "summary" in buf["behavior"]: - if "read_files" in buf["behavior"]["summary"]: - data["files"]["read"] = buf["behavior"]["summary"]["read_files"] - if "read_keys" in buf["behavior"]["summary"]: - data["registry"]["read"] = buf["behavior"]["summary"]["read_keys"] - if "resolved_apis" in buf["behavior"]["summary"]: - data["resolved_apis"] = buf["behavior"]["summary"]["resolved_apis"] - - if buf["network"] and "http" in buf["network"]: - data["network"]["http"] = {} - for req in buf["network"]["http"]: - if "host" in req: - data["network"]["http"]["host"] = req["host"] - else: - data["network"]["http"]["host"] = "" - if "data" in req and "\r\n" in req["data"]: - data["network"]["http"]["data"] = req["data"].split("\r\n", 1)[0] - else: - data["network"]["http"]["data"] = "" - if "method" in req: - data["network"]["http"]["method"] = req["method"] - else: - data["network"]["http"]["method"] = "" - if "user-agent" in req: - data["network"]["http"]["ua"] = req["user-agent"] - else: - data["network"]["http"]["ua"] = "" + # security check + mti = buf.get("info", {}).get("options", {}).get("main_task_id") - if "strings" in list(buf.keys()): - data["strings"] = buf["strings"] - else: - data["strings"] = ["No Strings"] + print("Task id", mti, task_id) + if mti and task_id != mti: + resp = {"error": True, "error_value": "Data if not matching task id, report to TCR for investigation."} + return Response(resp) - if "trid" in list(buf.keys()): - data["trid"] = buf["trid"] - else: - data["trid"] = ["None matched"] + data = report_to_iocs(buf, detail) resp = {"error": False, "data": data} return Response(resp) @@ -1576,12 +1404,11 @@ def tasks_screenshot(request, task_id, screenshot="all"): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -1629,12 +1456,11 @@ def tasks_pcap(request, task_id): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -1919,12 +1745,11 @@ def tasks_evtx(request, task_id): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -1952,7 +1777,7 @@ def tasks_mitmdump(request, task_id): return Response(resp) check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -1979,12 +1804,11 @@ def tasks_dropped(request, task_id): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -2128,12 +1952,11 @@ def tasks_surifile(request, task_id): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -2193,12 +2016,11 @@ def tasks_procmemory(request, task_id, pid="all"): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -2271,12 +2093,11 @@ def tasks_fullmemory(request, task_id): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -2318,7 +2139,7 @@ def file(request, stype, value): elif stype == "task": check = validate_task(value) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) sid = db.view_task(value).to_dict()["sample_id"] file_hash = db.view_sample(sid).to_dict()["sha256"] @@ -2378,7 +2199,7 @@ def machines_list(request): resp = {} resp["data"] = [] - resp["error"] = False + resp["error"] = [] machines = db.list_machines() for row in machines: resp["data"].append(row.to_dict()) @@ -2394,7 +2215,7 @@ def exit_nodes_list(request): resp = {} resp["data"] = [] - resp["error"] = False + resp["error"] = [] resp["data"] += ["socks:" + sock5 for sock5 in _load_socks5_operational() or []] resp["data"] += ["vpn:" + vpn for vpn in vpns.keys() or []] if routing_conf.tor.enabled: @@ -2416,7 +2237,7 @@ def machines_view(request, name=None): machine = db.view_machine(name=name) if machine: resp["data"] = machine.to_dict() - resp["error"] = False + resp["error"] = [] else: resp["error"] = True resp["error_value"] = "Machine not found" @@ -2438,7 +2259,7 @@ def cuckoo_status(request): resp["error"] = True resp["error_value"] = "Cuckoo Status API is disabled" else: - resp["error"] = False + resp["error"] = [] tasks_dict_with_counts = db.get_tasks_status_count() total_sum = 0 if isinstance(tasks_dict_with_counts, dict): @@ -2503,7 +2324,7 @@ def task_x_hours(request): @api_view(["GET"]) def tasks_latest(request, hours): resp = {} - resp["error"] = False + resp["error"] = [] timestamp = datetime.now() - timedelta(hours=int(hours)) ids = db.list_tasks(completed_after=timestamp) resp["ids"] = [id.to_dict() for id in ids] @@ -2519,12 +2340,11 @@ def tasks_payloadfiles(request, task_id): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -2556,12 +2376,11 @@ def tasks_procdumpfiles(request, task_id): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -2593,12 +2412,11 @@ def tasks_config(request, task_id, cape_name=False): check = validate_task(task_id) if check["error"]: - return Response(check) + return Response({"error": True, "error_value": "Task has a TLP of RED"}) if check.get("tlp", "") in ("red", "Red"): return Response({"error": True, "error_value": "Task has a TLP of RED"}) - rtid = check.get("rtid", 0) if rtid: task_id = rtid @@ -2712,7 +2530,7 @@ def tasks_download_services(request): hashes = request.POST.get("hashes").strip() if not hashes: return Response({"error": True, "error_value": "hashes value is empty"}) - resp["error"] = False + resp["error"] = [] # Parse potential POST options (see submission/views.py) options = request.POST.get("options", "") custom = request.POST.get("custom", "") @@ -2829,8 +2647,7 @@ def _stream_iterator(fp, guest_name, chunk_size=1024): resp = {"error": True, "error_value": "Filepath mustn't start with /"} return Response(resp) filepath = os.path.join(CUCKOO_ROOT, "storage", "analyses", f"{task_id}", filepath) - task_dir = os.path.join(ANALYSIS_BASE_PATH, "analyses", f"{task_id}") - if not os.path.normpath(filepath).startswith(task_dir + os.sep): + if not os.path.normpath(filepath).startswith(ANALYSIS_BASE_PATH): resp = {"error": True, "error_value": "Path traversal detected"} return Response(resp) if not os.path.isfile(filepath): @@ -2849,49 +2666,3 @@ def _stream_iterator(fp, guest_name, chunk_size=1024): log.exception(ex) resp = {"error": True, "error_value": f"Requests exception: {ex}"} return Response(resp) - - -@csrf_exempt -@api_view(["GET"]) -def dist_tasks_reported(request): - # List finished tasks here - if not DIST_ENABLED: - return Response( - { - "Error": True, - "error_value": "Distributed CAPE is not enabled", - } - ) - """ - - Add new API endpoint in CAPE to query the tasks that are reported and ready to be retrieved - Add new API endpoint in CAPE to set "task.notificated = True" for a specific task - - yeah we could script that go and fetch reported tasks. - can you currently list tasks that are finished but waiting to be retrieved in the api? - e.g. in the notification_loop() in dist.py, where it queries tasks that need to be sent to the callback url it does this: - - if there was an pi endpoint that exposed that, and another that allowed us to set notificated on the task when we'd finished processing it, then we wouldnt need the callback anymore - """ - # change to with session as - dist_db = dist_session() - ready = [] - tasks = dist_db.query(DTask).filter_by(finished=True, retrieved=True, notificated=False).order_by(DTask.id.desc()).all() - for task in tasks or []: - ready.append(task.main_task_id) - dist_db.close() - return Response({"Tasks": ready}) - - -@csrf_exempt -@api_view(["GET"]) -def dist_tasks_notification(request, task_id: int): - dist_db = dist_session() - tasks = dist_db.query(DTask).filter_by(main_task_id=task_id).order_by(DTask.id.desc()).all() - if not tasks: - return Response({"error": True, "error_value": f"No tasks found with main_task_id: {task_id}"}) - for task in tasks: - # main_db.set_status(task.main_task_id, TASK_REPORTED) - # log.debug("reporting main_task_id: {}".format(task.main_task_id)) - task.notificated = True - diff --git a/web/dashboard/views.py b/web/dashboard/views.py index b23c634658b..8d18cb2a0ca 100644 --- a/web/dashboard/views.py +++ b/web/dashboard/views.py @@ -13,19 +13,8 @@ sys.path.append(settings.CUCKOO_PATH) -from lib.cuckoo.common.web_utils import top_detections from lib.cuckoo.core.database import Database -from lib.cuckoo.core.data.task import ( - TASK_COMPLETED, - TASK_DISTRIBUTED, - TASK_FAILED_ANALYSIS, - TASK_FAILED_PROCESSING, - TASK_FAILED_REPORTING, - TASK_PENDING, - TASK_RECOVERED, - TASK_REPORTED, - TASK_RUNNING -) +from lib.cuckoo.core.data.task import TASK_COMPLETED, TASK_REPORTED # Conditional decorator for web authentication @@ -51,32 +40,17 @@ def format_number_with_space(number): def index(request): db: TasksMixIn = Database() + states_count = db.get_tasks_status_count() report = dict( total_samples=format_number_with_space(db.count_samples()), total_tasks=format_number_with_space(db.count_tasks()), - states_count={}, + states_count=states_count, estimate_hour=None, estimate_day=None, ) - states = ( - TASK_PENDING, - TASK_RUNNING, - TASK_DISTRIBUTED, - TASK_COMPLETED, - TASK_RECOVERED, - TASK_REPORTED, - TASK_FAILED_ANALYSIS, - TASK_FAILED_PROCESSING, - TASK_FAILED_REPORTING, - ) - - for state in states: - report["states_count"][state] = db.count_tasks(state) - # For the following stats we're only interested in completed tasks. - tasks = db.count_tasks(status=TASK_COMPLETED) - tasks += db.count_tasks(status=TASK_REPORTED) + tasks = states_count.get(TASK_COMPLETED, 0) + states_count.get(TASK_REPORTED, 0) data = {"title": "Dashboard", "report": {}} @@ -93,7 +67,7 @@ def index(request): report["estimate_hour"] = format_number_with_space(int(hourly)) report["estimate_day"] = format_number_with_space(int(24 * hourly)) - report["top_detections"] = top_detections() + # report["top_detections"] = top_detections() data["report"] = report return render(request, "dashboard/index.html", data) diff --git a/web/web/settings.py b/web/web/settings.py index 53858ef7cab..3c2c492855a 100644 --- a/web/web/settings.py +++ b/web/web/settings.py @@ -155,6 +155,7 @@ # in apps' "static/" subdirectories and in STATICFILES_DIRS. # Example: "/home/media/media.lawrence.com/static/" # When NGINX is as reverse proxy you need to put next line in local_settings.py +# python manage.py collectstatic STATIC_ROOT = "" @@ -338,7 +339,7 @@ SITE_ID = 1 -# https://docs.allauth.org/en/dev/socialaccount/configuration.html +# https://django-allauth.readthedocs.io/en/latest/configuration.html if web_cfg.registration.get("email_confirmation", False): ACCOUNT_EMAIL_VERIFICATION = "mandatory" SOCIALACCOUNT_EMAIL_VERIFICATION = ACCOUNT_EMAIL_VERIFICATION @@ -373,6 +374,7 @@ if web_cfg.registration.get("captcha_enabled", False): ACCOUNT_SIGNUP_FORM_CLASS = "web.allauth_forms.CaptchedSignUpForm" +# SOCIALACCOUNT_FORMS = {"signup": "web.allauth_forms.MyCustomSocialSignupForm"} # Fix to avoid migration warning in django 1.7 about test runner (1_6.W001). # In future it could be removed: https://code.djangoproject.com/ticket/23469