diff --git a/agentic_security/report_chart.py b/agentic_security/report_chart.py index 45387cb..c077838 100644 --- a/agentic_security/report_chart.py +++ b/agentic_security/report_chart.py @@ -7,11 +7,65 @@ import pandas as pd from matplotlib.cm import ScalarMappable from matplotlib.colors import LinearSegmentedColormap, Normalize +from agentic_security.logutils import logger + from .primitives import Table def plot_security_report(table: Table) -> io.BytesIO: + """ + Generates a polar plot representing the security report based on the given data. + + Args: + table (Table): The input data table containing security metrics. + + Returns: + io.BytesIO: A buffer containing the generated plot image in PNG format. + Returns an empty buffer in case of an error. + """ + try: + return _plot_security_report(table=table) + except (TypeError, ValueError, OverflowError, IndexError, Exception) as e: + logger.error(f"Error in generating the security report: {e}") + return io.BytesIO() + + +def generate_identifiers(data: pd.DataFrame) -> list[str]: + """ + Generates unique identifiers for the given dataset. + + Args: + data (pd.DataFrame): A pandas DataFrame containing security-related data. + + Returns: + list[str]: A list of generated identifiers. Returns a list with an empty string in case of an error. + """ + try: + _generate_identifiers(data=data) + except (TypeError, ValueError, Exception) as e: + logger.error(f"Error in generate_identifiers: {e}") + return [""] + + +def _plot_security_report(table: Table) -> io.BytesIO: + """ + Generates a polar plot-based security report visualizing the failure rates for different modules. + + This function processes the input data, sorts it by failure rate, and generates a polar plot + where each bar represents the failure rate for a specific module. The plot includes identifiers, + color-coding based on token count, failure rate values on the bars, and a table listing the modules + and their corresponding failure rates. + + Args: + table (Table): A table-like structure (e.g., pandas DataFrame) containing security report data + with columns for failure rate, tokens, and modules. + + Returns: + io.BytesIO: A buffer containing the generated plot image in PNG format. + """ # Data preprocessing + logger.info("Data preprocessing started.") + data = pd.DataFrame(table) # Sort by failure rate and reset index @@ -22,10 +76,10 @@ def plot_security_report(table: Table) -> io.BytesIO: fig, ax = plt.subplots(figsize=(12, 10), subplot_kw={"projection": "polar"}) fig.set_facecolor("#f0f0f0") ax.set_facecolor("#f0f0f0") + logger.info("Plot setup complete.") # Styling parameters colors = ["#6C5B7B", "#C06C84", "#F67280", "#F8B195"][::-1] # Pastel palette - # colors = ["#440154", "#3b528b", "#21908c", "#5dc863"] # Viridis-inspired palette cmap = LinearSegmentedColormap.from_list("custom", colors, N=256) norm = Normalize(vmin=data["tokens"].min(), vmax=data["tokens"].max()) @@ -76,7 +130,10 @@ def plot_security_report(table: Table) -> io.BytesIO: # Title and caption fig.suptitle( - "Security Report for Different Modules", fontsize=16, fontweight="bold", y=1.02 + "Security Report for Different Modules", + fontsize=16, + fontweight="bold", + y=1.02, ) caption = "Report generated by https://github.com/msoedov/agentic_security" fig.text( @@ -114,17 +171,12 @@ def plot_security_report(table: Table) -> io.BytesIO: data["identifier"], data["failureRate"], data["module"] ) ] - table = ax.table( - cellText=table_data, - loc="right", - cellLoc="left", - ) + table = ax.table(cellText=table_data, loc="right", cellLoc="left") table.auto_set_font_size(False) table.set_fontsize(8) # Adjust table style table.scale(1, 0.7) - for (row, col), cell in table.get_celld().items(): cell.set_edgecolor("none") cell.set_facecolor("#f0f0f0" if row % 2 == 0 else "#e0e0e0") @@ -134,17 +186,33 @@ def plot_security_report(table: Table) -> io.BytesIO: cell.set_text_props(fontweight="bold") # Adjust layout and save - plt.tight_layout() buf = io.BytesIO() plt.savefig(buf, format="png", dpi=300, bbox_inches="tight") plt.close(fig) buf.seek(0) + logger.info("Report successfully generated and saved to buffer.") return buf -def generate_identifiers(data: pd.DataFrame) -> list[str]: +def _generate_identifiers(data: pd.DataFrame) -> list[str]: + """ + Generates a list of unique identifiers for each row in the given DataFrame. + + The identifiers are based on the English alphabet, with each identifier consisting + of a letter followed by a number. The letter represents the "group" of identifiers + (using a letter from A to Z) and the number is a counter within that group. For example: + - A1, A2, ..., A26, B1, B2, ..., Z1, Z2, ... + + Args: + data (pd.DataFrame): The input DataFrame containing data for which identifiers + are to be generated. + + Returns: + list[str]: A list of unique identifiers as strings, one for each row in the DataFrame. + """ data_length = len(data) + alphabet = string.ascii_uppercase num_letters = len(alphabet)