------------------------- -- Prep ------------------------- SET file_search_path = '/home/daifukusan/Documents/analysis/legislatives2024/res/'; CREATE OR REPLACE TEMP TABLE nuances AS SELECT *, CASE WHEN orientation = 'Extrême Gauche' THEN -2 WHEN orientation = 'Gauche' THEN -1 WHEN orientation = 'Centre' THEN 0 WHEN orientation = 'Droite' THEN 1 WHEN orientation = 'Extrême Droite' THEN 2 END AS 'orientation_num' FROM read_csv_auto('nuances.csv'); CREATE OR REPLACE TEMP TABLE raw AS SELECT regexp_extract(region, '\((.+)\)', 1) AS region_code, region, regexp_extract(circo, '\((.+)\)', 1) AS circo_code, circo, trim(results->'$[0]', '"') AS nuance, trim(results->'$[1]', '"') AS candidat, CAST(results->'$[2]' AS BIGINT) AS voix, CAST(results->'$[3]' AS DECIMAL) AS inscrits, CAST(results->'$[4]' AS DECIMAL) AS exprimes, trim(results->'$[5]', '"') AS qualif, FROM ( SELECT region, circo, unnest(results) AS results FROM read_json_auto('legislatives_2024_fr_results.json') ) AS t; CREATE OR REPLACE TEMP TABLE undecided AS WITH with_qualif AS ( SELECT DISTINCT region, circo FROM raw WHERE qualif = 'QUALIF T2' ) SELECT r.* FROM raw AS r JOIN with_qualif AS u ON u.region = r.region AND u.circo = r.circo; ------------------- -- Analyse ------------------- -- Configurations de second tours par ordre de voix par qualifiés WITH versus AS ( SELECT region, circo, string_agg(nuance, ', ' ORDER BY exprimes) AS vs FROM raw WHERE qualif = 'QUALIF T2' GROUP BY region, circo ORDER BY region, circo ) SELECT count(*) AS "nombre", vs AS "configuration" FROM versus GROUP BY vs ORDER BY "nombre" DESC; -- Nombre de 2n tours SELECT CASE WHEN qualif = 'OUI' THEN 'NON' ELSE 'OUI' END AS '2nd tour ?', count(*) as 'nombre' FROM (SELECT DISTINCT region, circo, qualif FROM raw WHERE qualif in ('QUALIF T2', 'OUI')) AS t GROUP BY qualif; -- Nombre de circonscriptions qui auront un 2nd tour dans lesquelles NFP est arrivé à telle ou telle position au premier tour: WITH ordered AS ( SELECT *, ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY exprimes DESC) AS pos FROM raw WHERE qualif = 'QUALIF T2' ) SELECT pos AS "Position 1er tour", count(*) AS "Nombre de Circonscription" FROM ordered WHERE nuance = 'UG' GROUP BY pos ORDER BY pos; ---------------------- -- Projection ---------------------- CREATE OR REPLACE TEMP TABLE cleaned_undecided AS SELECT region_code, circo_code, region, circo, nuance, candidat, voix, inscrits, exprimes, CASE WHEN qualif = 'NON' THEN FALSE ELSE TRUE END AS qualified, code, nom, orientation, orientation_num, ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY exprimes DESC) AS pos FROM undecided AS r JOIN nuances AS n ON r.nuance = n.code; CREATE OR REPLACE TEMP TABLE report_voix_duel AS SELECT * FROM read_csv_auto('report_voix_duel.csv'); CREATE OR REPLACE TEMP TABLE tour_2 AS WITH duels AS ( SELECT circo_code, list(orientation ORDER BY orientation) AS orientations FROM cleaned_undecided WHERE qualified = TRUE AND pos <= 2 GROUP BY circo_code ) SELECT cu.*, CASE WHEN orientation = 'Extrême Gauche' THEN 'Gauche' ELSE orientation END AS bucket_orientation, CASE WHEN orientation_num = -2 THEN -1 ELSE orientation_num END AS bucket_orientation_num, d.orientations[1] AS orientation_candidat_1, d.orientations[2] AS orientation_candidat_2 FROM cleaned_undecided AS cu JOIN duels AS d ON d.circo_code = cu.circo_code; CREATE OR REPLACE TEMP TABLE projection AS WITH report AS ( SELECT dc.region, dc.circo, rvd."to" AS orientation, dc.exprimes * rvd.quantity AS projection FROM tour_2 AS dc JOIN report_voix_duel AS rvd ON ( dc.orientation_candidat_1 = rvd.candidat_1 AND dc.orientation_candidat_2 = rvd.candidat_2 OR dc.orientation_candidat_1 = rvd.candidat_2 AND dc.orientation_candidat_2 = rvd.candidat_1 ) AND rvd."from" = dc.orientation ORDER BY circo_code ), agg_projection AS ( SELECT region, circo, orientation, SUM(projection) AS projection FROM report GROUP BY region, circo, orientation ORDER BY region, circo, orientation ) SELECT region, circo, orientation, projection / SUM(projection) OVER (PARTITION BY region, circo) AS projection FROM agg_projection ----- -- Circonscriptions dans lesquelles il y a 2 candidats de même orientation SELECT region, circo, orientation, string_agg(nuance, ', ' ORDER BY exprimes DESC) AS nuances FROM cleaned_undecided WHERE qualified = TRUE GROUP BY region, circo, orientation HAVING count(*) >= 2 ORDER BY orientation, region, circo -- Duels en cas de désistement automatique WITH duels_circo AS ( SELECT DISTINCT circo_code, orientation_candidat_1, orientation_candidat_2 FROM tour_2 ) SELECT orientation_candidat_1, orientation_candidat_2, count(*) AS n FROM duels_circo GROUP BY orientation_candidat_1, orientation_candidat_2 ORDER BY n DESC; -- Après report de voix WITH ordered_projection AS ( SELECT *, ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY projection DESC) AS pos FROM projection ) SELECT orientation, count(*) AS n FROM ordered_projection WHERE pos = 1 GROUP BY orientation ORDER BY n DESC; -- Projection avec faible écart CREATE OR REPLACE TEMP TABLE difference AS WITH diff_null AS ( SELECT region, circo, ABS(projection - lag(projection) OVER (PARTITION BY region, circo)) AS diff FROM projection ORDER BY region, circo ) SELECT * FROM diff_null WHERE diff IS NOT NULL; WITH duels_circo AS ( SELECT DISTINCT circo_code, region, circo, orientation_candidat_1, orientation_candidat_2 FROM tour_2 ), diff_null AS ( SELECT region, circo, ABS(projection - lag(projection) OVER (PARTITION BY region, circo)) AS diff, FIRST(orientation) OVER (PARTITION BY region, circo ORDER BY projection DESC) AS elu FROM projection ORDER BY region, circo ), diff AS ( SELECT * FROM diff_null WHERE diff IS NOT NULL ), diff_orientation AS ( SELECT dc.*, d.diff, d.elu FROM duels_circo AS dc JOIN diff AS d ON d.region = dc.region AND d.circo = dc.circo ), bucket_diff_orientation AS ( SELECT orientation_candidat_1 || ' v ' || orientation_candidat_2 AS duel, elu AS "projection élu", SUM(CASE WHEN diff < 0.01 THEN 1 ELSE 0 END) AS "1%", SUM(CASE WHEN diff < 0.02 THEN 1 ELSE 0 END) AS "2%", SUM(CASE WHEN diff < 0.05 THEN 1 ELSE 0 END) AS "5%", SUM(CASE WHEN diff < 0.10 THEN 1 ELSE 0 END) AS "10%", SUM(CASE WHEN diff < 0.20 THEN 1 ELSE 0 END) AS "20%", SUM(CASE WHEN diff < 0.50 THEN 1 ELSE 0 END) AS "50%", COUNT(*) AS total FROM diff_orientation GROUP BY orientation_candidat_1, orientation_candidat_2, elu ), ordered_bucket_diff_orientation AS ( SELECT *, ROW_NUMBER() OVER (ORDER BY duel ASC, "projection élu" ASC) AS r FROM bucket_diff_orientation ), res AS ( SELECT * FROM ordered_bucket_diff_orientation UNION ALL SELECT 'TOTAL' AS duel, '' AS "projection élu", SUM("1%") AS "1%", SUM("2%") AS "3%", SUM("5%") AS "5%", SUM("10%") AS "10%", SUM("20%") AS "30%", SUM("50%") AS "50%", SUM(total) AS total, 0 AS r FROM ordered_bucket_diff_orientation ) SELECT * EXCLUDE (r) FROM res ORDER BY r DESC