Source code for poli_sci_kit.utils

"""
Utilities
---------

Utility functions for general operations and plotting.

Contents:
    normalize,
    gen_list_of_lists,
    gen_faction_groups,
    gen_parl_points,
    swap_parl_allocations,
    hex_to_rgb,
    rgb_to_hex,
    scale_saturation
"""

import colorsys

import numpy as np
import pandas as pd
from colormath.color_objects import sRGBColor


[docs]def normalize(vals): """Returns respective normalized values.""" total_vals = sum(vals) return [1.0 * v / total_vals for v in vals]
[docs]def gen_list_of_lists(original_list, new_structure): """Generates a list of lists with a given structure from a given list.""" assert len(original_list) == sum( new_structure ), "The number of elements in the original list and desired structure don't match." return [ [original_list[i + sum(new_structure[:j])] for i in range(new_structure[j])] for j in range(len(new_structure)) ]
[docs]def gen_faction_groups(original_list, factions_indexes): """ Reorders a list into a list of lists where sublists are faction amounts. Parameters ---------- original_list : list The data to be reorganized. factions_indexes : list of lists (contains ints) The structure of original_list indexes to output. Returns ------- factioned_list : list of lists The values of original_list ordered as the indexes of factions_indexes. """ factions_structure = [len(sublist) for sublist in factions_indexes] flat_indexes = [item for sublist in factions_indexes for item in sublist] ordered_original_list = [original_list[i] for i in flat_indexes] return gen_list_of_lists(ordered_original_list, factions_structure)
[docs]def gen_parl_points( allocations, labels=None, style="semicircle", num_rows=2, speaker=False ): """ Produces a df with coordinates for a parliament plot. Parameters ---------- allocations : list The share of seats given to the regions or parties. labels : list : optional (default=None) The names of the groups. style : str (default=semicircle) Whether to plot the parliament as a semicircle or a rectangle. num_rows : int (default=2) The number of rows in the plot. speaker : bool : optional (default=False) Whether to include a point for the speaker of the house colored by their group. Note: 'True' colors the point based on the largest group, but passing a name from 'labels' is also possible. Returns ------- df_seat_lctns : pd.DataFrame A dataframe with points to be converted to a parliament plot via seaborn's scatterplot. """ assert style in [ "semicircle", "rectangle", ], "Please choose one of semicircle or rectangle for the plotting style." total_seats = sum(allocations) if not labels: # For dataframe assignment. labels = [f"group_{i}" for i in range(len(allocations))] if speaker: assert (speaker == True) or ( speaker in labels ), "Either the 'speaker' argument must be true, or must match an element from the provided 'labels' argument." total_seats -= 1 allocations = list(allocations) if speaker == True: assert ( len([c for c in allocations if c == max(allocations)]) == 1 ), "Two parties got the highest number of seats in the allocation. Please assign the speaker via passing one of their names." largest_group_index = allocations.index(max(allocations)) allocations[largest_group_index] -= 1 # Reassign 'speaker' to the largest group's name so it can be assigned later. speaker = labels[largest_group_index] elif speaker in labels: largest_group_index = labels.index(speaker) allocations[largest_group_index] -= 1 # Make an empty dataframe and fill it with coordinates for the structure. # Then assign group values for allocation based on the rows. df_seat_lctns = pd.DataFrame( columns=["group", "row", "row_position", "x_loc", "y_loc"] ) if style == "semicircle": def arc_coordinates(r, seats): """ Generates an arc of the parliament plot given a radius and the number of seats. """ angles = np.linspace(start=np.pi, stop=0, num=seats) # Broadcast angles to their corresponding coordinates. x_coordinates = list(r * np.cos(angles)) y_coordinates = list(r * np.sin(angles)) return x_coordinates, y_coordinates, list(angles) # Store point coordinates (x, y) and their angles with origin (0, 0). xs, ys, thetas = [], [], [] # Create a list with radii values for each row. radii = range(2, 2 + num_rows) # Calculate the number of seats each row will have. row_seats = [int(total_seats / num_rows)] * num_rows extra_seat = total_seats - sum( row_seats ) # 0 or 1 based on whether the seats divide evenly into the rows. row_seats[-1] += extra_seat # Shift the seats per row such that it's always increasing. if num_rows % 2 != 0: seats_shift = list(range(-int(num_rows / 2), int(num_rows / 2) + 1)) else: positive_shift = list(range(1, int(num_rows / 2) + 1)) negative_shift = [-1 * i for i in positive_shift[::-1]] seats_shift = negative_shift + positive_shift seats_shift = [ i * int(num_rows / 2) for i in seats_shift ] # greater shift for higher rows for more equal spacing seats_per_row = [rs + seats_shift[i] for i, rs in enumerate(row_seats)] if any(seats <= 0 for seats in seats_per_row): raise ValueError(f"Cannot allocate {total_seats} seats into {num_rows} rows. Try a smaller number of rows.") row_indexes = [] row_position_indexes = [] for i, spr in enumerate(seats_per_row): arc_xs, arc_ys, arc_angles = arc_coordinates(radii[i], spr) xs += arc_xs ys += arc_ys thetas += arc_angles row_indexes += [i] * spr # List of lists for position indexes such that they can be accessed by row and position. row_position_indexes += [list(range(spr))] # Populate dataframe with coordinates, row number and position and angles. df_seat_lctns["x_loc"] = xs df_seat_lctns["y_loc"] = ys df_seat_lctns["theta"] = thetas df_seat_lctns["row"] = row_indexes df_seat_lctns["row_position"] = [ item for sublist in row_position_indexes for item in sublist ] # Generate list of seat labels. seat_labels = [] for n_seats, label in zip(allocations, labels): seat_labels.extend([label]*n_seats) # Sort plot points by their angle with the origin (0, 0). df_seat_lctns = df_seat_lctns.sort_values( by=["theta", "row"], ascending=[False, True] ) # Assign seat labels. df_seat_lctns["group"] = seat_labels elif style == "rectangle": x_coordinate = 0 # y_coordinates are split by baseline of 2 units, with double that for # the middle aisle. equa_distant_indexes = list(range(0, num_rows * 2, 2)) y_coordinates = [ i if ( equa_distant_indexes.index(i) < int(len(equa_distant_indexes) / 2) and len(equa_distant_indexes) % 2 == 0 ) or ( equa_distant_indexes.index(i) < int(len(equa_distant_indexes) / 2) + 1 and len(equa_distant_indexes) % 2 != 0 ) else i + 2 for i in equa_distant_indexes ] if num_rows == 1: for i in range(total_seats): df_seat_lctns.loc[i, "x_loc"] = x_coordinate df_seat_lctns.loc[i, "y_loc"] = 0 x_coordinate += 2 df_seat_lctns["row"] = [0] * len(df_seat_lctns) list_of_name_lists = [[labels[i]] * a for i, a in enumerate(allocations)] df_seat_lctns["group"] = [ item for sublist in list_of_name_lists for item in sublist ] else: row_index = 0 position_index = 0 row_seats_no_remainder = int(total_seats / num_rows) * num_rows for i in range(row_seats_no_remainder): y_coordinate = y_coordinates[row_index] df_seat_lctns.loc[i, "row"] = row_index df_seat_lctns.loc[i, "row_position"] = position_index df_seat_lctns.loc[i, "x_loc"] = x_coordinate df_seat_lctns.loc[i, "y_loc"] = y_coordinate x_coordinate += 2 position_index += 1 # Reset to the start of the next row. if (i + 1) % int(total_seats / num_rows) == 0: row_index += 1 x_coordinate = 0 position_index = 0 # Add last seats that were rounded off. max_x = max(df_seat_lctns["x_loc"]) max_pos = max(df_seat_lctns["x_loc"]) row_index = 0 # reset to first row for i in list(range(total_seats))[row_seats_no_remainder:]: y_coordinate = y_coordinates[row_index] df_seat_lctns.loc[i, "row"] = row_index df_seat_lctns.loc[i, "row_position"] = max_pos + 1 df_seat_lctns.loc[i, "x_loc"] = max_x + 2 df_seat_lctns.loc[i, "y_loc"] = y_coordinate row_index += 1 # Sort df for index based assignment. df_seat_lctns.sort_values( ["row", "x_loc", "y_loc"], ascending=[True, True, True], inplace=True ) df_seat_lctns.reset_index(inplace=True, drop=True) # Define the top and bottom rows so they can be filled in order. top_rows = y_coordinates[int((len(y_coordinates) + 1) / 2) :] bottom_rows = y_coordinates[: int((len(y_coordinates) + 1) / 2)] # Find the total seats in each section to be depleted. total_top_seats = 0 for row in top_rows: total_top_seats += len(df_seat_lctns[df_seat_lctns["y_loc"] == row]) total_bottom_seats = 0 for row in bottom_rows: total_bottom_seats += len(df_seat_lctns[df_seat_lctns["y_loc"] == row]) # Index the group and deplete a copy of allocations at its location. group_index = 0 seats_to_allocate = allocations.copy() # Top assignment from low to high and left to right. top_x = 0 top_y = top_rows[0] while total_top_seats > 0: index_to_assign = [ i for i in df_seat_lctns.index if df_seat_lctns.loc[i, "x_loc"] == top_x and df_seat_lctns.loc[i, "y_loc"] == top_y ][0] df_seat_lctns.loc[index_to_assign, "group"] = labels[group_index] seats_to_allocate[group_index] -= 1 if seats_to_allocate[group_index] == 0: group_index += 1 if top_y == top_rows[-1]: # Move right and reset vertical. top_x += 2 top_y = top_rows[0] else: # Move up. top_y += 2 total_top_seats -= 1 # Bottom assignment from high to low and right to left. bottom_x = max(df_seat_lctns["x_loc"]) bottom_y = bottom_rows[-1] # Fix initial position in case of unequal seats per row. while not [ i for i in df_seat_lctns.index if df_seat_lctns.loc[i, "x_loc"] == bottom_x and df_seat_lctns.loc[i, "y_loc"] == bottom_y ]: # Move down. bottom_y -= 2 while total_bottom_seats > 0: index_to_assign = [ i for i in df_seat_lctns.index if df_seat_lctns.loc[i, "x_loc"] == bottom_x and df_seat_lctns.loc[i, "y_loc"] == bottom_y ][0] df_seat_lctns.loc[index_to_assign, "group"] = labels[group_index] seats_to_allocate[group_index] -= 1 if seats_to_allocate[group_index] == 0: group_index += 1 if bottom_y == bottom_rows[0]: # Move left and reset vertical. bottom_x -= 2 bottom_y = bottom_rows[-1] else: # Move down. bottom_y -= 2 total_bottom_seats -= 1 else: ValueError("The 'style' argument must be either 'semicircle' or 'rectangle'") if speaker: index_to_assign = len(df_seat_lctns) if style == "semicircle": df_seat_lctns.loc[index_to_assign, "x_loc"] = 0 df_seat_lctns.loc[index_to_assign, "y_loc"] = 0 df_seat_lctns.loc[index_to_assign, "group"] = speaker elif style == "rectangle": if len(y_coordinates) % 2 == 0: middle_index_1 = len(y_coordinates) / 2 - 1 middle_index_2 = len(y_coordinates) / 2 y_coordinate = ( y_coordinates[int(middle_index_1)] + y_coordinates[int(middle_index_2)] ) / 2 else: middle_index = int(len(y_coordinates) / 2) y_coordinate = float(y_coordinates[middle_index] + 2) df_seat_lctns.loc[index_to_assign, "x_loc"] = 0 df_seat_lctns.loc[index_to_assign, "y_loc"] = y_coordinate df_seat_lctns.loc[index_to_assign, "group"] = speaker return df_seat_lctns
[docs]def swap_parl_allocations(df, row_0, pos_0, row_1, pos_1): """ Replaces two allocations of the parliament plot df to clean up coloration. Parameters ---------- df : pandas.DataFrame DataFrame containing parliament data row_0 : int The row of one seat to swap. pos_0 : int The position in the row of one seat to swap. row_1 : int The row of the other seat to swap. pos_1 : int The position in the row of the other seat to swap. Returns ------- df_seat_lctns : pd.DataFrame A parliament plot allocations data frame with two allocations swapped """ allocation_0 = df[(df["row"] == row_0) & (df["row_position"] == pos_0)][ "group" ].values[0] index_1 = df[(df["row"] == row_0) & (df["row_position"] == pos_0)].index allocation_1 = df[(df["row"] == row_1) & (df["row_position"] == pos_1)][ "group" ].values[0] index_2 = df[(df["row"] == row_1) & (df["row_position"] == pos_1)].index df.loc[index_1, "group"] = allocation_1 df.loc[index_2, "group"] = allocation_0
[docs]def hex_to_rgb(hex_rep): """ Converts a hexadecimal representation to its RGB ratios. Parameters ---------- hex_rep : str The hex representation of the color. Returns ------- rgb_trip : tuple An RGB tuple color representation. """ return sRGBColor( *[int(hex_rep[i + 1:i + 3], 16) for i in (0, 2, 4)], is_upscaled=True )
[docs]def rgb_to_hex(rgb_trip): """ Converts rgb ratios to their hexadecimal representation. Parameters ---------- rgb_trip : tuple An RGB tuple color representation. Returns ------- hex_rep : str The hex representation of the color. """ trip_0, trip_1, trip_2 = rgb_trip[0], rgb_trip[1], rgb_trip[2] if isinstance(trip_0, (float, np.float64)): trip_0 *= 255 trip_1 *= 255 trip_2 *= 255 return "#%02x%02x%02x" % (int(trip_0), int(trip_1), int(trip_2))
[docs]def scale_saturation(rgb_trip, sat): """ Changes the saturation of an rgb color. Parameters ---------- rgb_trip : tuple An RGB tuple color representation. sat : float The saturation it rgb_trip should be modified by. Returns ------- saturated_rgb : tuple colorsys.hls_to_rgb saturation of the given color. """ if (isinstance(rgb_trip, str)) and (len(rgb_trip) == 9) and (rgb_trip[-2:] == "00"): # An RGBA has been provided and its alpha is 00, so return it for # a transparent marker. return rgb_trip if (isinstance(rgb_trip, str)) and (len(rgb_trip) == 7): rgb_trip = hex_to_rgb(rgb_trip) if isinstance(rgb_trip, sRGBColor): rgb_trip = rgb_trip.get_value_tuple() h, l, s = colorsys.rgb_to_hls(*rgb_trip) return colorsys.hls_to_rgb(h, min(1, l * sat), s=s)