------------------------- -- Prep ------------------------- SET file_search_path = '/home/daifukusan/Documents/analysis/legislatives2024/res/'; CREATE OR REPLACE TEMP TABLE nuances AS SELECT *, CASE WHEN orientation = 'Extrême Gauche' THEN -2 WHEN orientation = 'Gauche' THEN -1 WHEN orientation = 'Centre' THEN 0 WHEN orientation = 'Droite' THEN 1 WHEN orientation = 'Extrême Droite' THEN 2 END AS 'orientation_num' FROM read_csv_auto('nuances.csv'); CREATE OR REPLACE TEMP TABLE raw AS SELECT regexp_extract(region, '\((.+)\)', 1) AS region_code, region, regexp_extract(circo, '\((.+)\)', 1) AS circo_code, circo, trim(results->'$[0]', '"') AS nuance, trim(results->'$[1]', '"') AS candidat, CAST(results->'$[2]' AS BIGINT) AS voix, CAST(results->'$[3]' AS DECIMAL) AS inscrits, CAST(results->'$[4]' AS DECIMAL) AS exprimes, trim(results->'$[5]', '"') AS qualif, FROM ( SELECT region, circo, unnest(results) AS results FROM read_json_auto('legislatives_2024_fr_results.json') ) AS t; CREATE OR REPLACE TEMP TABLE undecided AS WITH with_qualif AS ( SELECT DISTINCT region, circo FROM raw WHERE qualif = 'QUALIF T2' ) SELECT r.* FROM raw AS r JOIN with_qualif AS u ON u.region = r.region AND u.circo = r.circo; ------------------- -- Analyse ------------------- -- Configurations de second tours par ordre de voix par qualifiés WITH versus AS ( SELECT region, circo, string_agg(nuance, ', ' ORDER BY exprimes) AS vs FROM raw WHERE qualif = 'QUALIF T2' GROUP BY region, circo ORDER BY region, circo ) SELECT count(*) AS "nombre", vs AS "configuration" FROM versus GROUP BY vs ORDER BY "nombre" DESC; -- Nombre de 2n tours SELECT CASE WHEN qualif = 'OUI' THEN 'NON' ELSE 'OUI' END AS '2nd tour ?', count(*) as 'nombre' FROM (SELECT DISTINCT region, circo, qualif FROM raw WHERE qualif in ('QUALIF T2', 'OUI')) AS t GROUP BY qualif; -- Nombre de circonscriptions qui auront un 2nd tour dans lesquelles NFP est arrivé à telle ou telle position au premier tour: WITH ordered AS ( SELECT *, ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY exprimes DESC) AS pos FROM raw WHERE qualif = 'QUALIF T2' ) SELECT pos AS "Position 1er tour", count(*) AS "Nombre de Circonscription" FROM ordered WHERE nuance = 'UG' GROUP BY pos ORDER BY pos; ---------------------- -- Projection ---------------------- CREATE OR REPLACE TEMP TABLE cleaned_undecided AS SELECT region_code, circo_code, region, circo, nuance, candidat, voix, inscrits, exprimes, CASE WHEN qualif = 'NON' THEN FALSE ELSE TRUE END AS qualified, code, nom, orientation, orientation_num, ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY exprimes DESC) AS pos FROM undecided AS r JOIN nuances AS n ON r.nuance = n.code; CREATE OR REPLACE TEMP TABLE report_voix_duel AS SELECT * FROM read_csv_auto('report_voix_duel.csv'); CREATE OR REPLACE TEMP TABLE tour_2 AS WITH duels AS ( SELECT circo_code, list(orientation ORDER BY orientation) AS orientations FROM cleaned_undecided WHERE qualified = TRUE AND pos <= 2 GROUP BY circo_code ) SELECT cu.*, CASE WHEN orientation = 'Extrême Gauche' THEN 'Gauche' ELSE orientation END AS bucket_orientation, CASE WHEN orientation_num = -2 THEN -1 ELSE orientation_num END AS bucket_orientation_num, d.orientations[1] AS orientation_candidat_1, d.orientations[2] AS orientation_candidat_2 FROM cleaned_undecided AS cu JOIN duels AS d ON d.circo_code = cu.circo_code; ----- -- Circonscriptions dans lesquelles il y a 2 candidats de même orientation SELECT region, circo, orientation, string_agg(nuance, ', ' ORDER BY exprimes DESC) AS nuances FROM cleaned_undecided WHERE qualified = TRUE GROUP BY region, circo, orientation HAVING count(*) >= 2 ORDER BY orientation, region, circo -- Duels en cas de désistement automatique WITH duels_circo AS ( SELECT DISTINCT circo_code, orientation_candidat_1, orientation_candidat_2 FROM tour_2 ) SELECT orientation_candidat_1, orientation_candidat_2, count(*) AS n FROM duels_circo GROUP BY orientation_candidat_1, orientation_candidat_2 ORDER BY n DESC; -- Après report de voix WITH report AS ( SELECT dc.region, dc.circo, rvd."to" AS orientation, dc.exprimes * rvd.quantity AS projection FROM tour_2 AS dc JOIN report_voix_duel AS rvd ON ( dc.orientation_candidat_1 = rvd.candidat_1 AND dc.orientation_candidat_2 = rvd.candidat_2 OR dc.orientation_candidat_1 = rvd.candidat_2 AND dc.orientation_candidat_2 = rvd.candidat_1 ) AND rvd."from" = dc.orientation ORDER BY circo_code ), agg_projection AS ( SELECT region, circo, orientation, sum(projection) AS projection FROM report GROUP BY region, circo, orientation ORDER BY region, circo, orientation ), ordered_projection AS ( SELECT *, ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY projection DESC) AS pos FROM agg_projection ) SELECT orientation, count(*) AS n FROM ordered_projection WHERE pos = 1 GROUP BY orientation ORDER BY n DESC;