------------------------- -- Prep ------------------------- SET file_search_path = '/home/daifukusan/Documents/analysis/legislatives2024/res/'; CREATE OR REPLACE TEMP TABLE nuances AS SELECT *, CASE WHEN orientation = 'Extrême Gauche' THEN -2 WHEN orientation = 'Gauche' THEN -1 WHEN orientation = 'Centre' THEN 0 WHEN orientation = 'Droite' THEN 1 WHEN orientation = 'Extrême Droite' THEN 2 END AS 'orientation_num' FROM read_csv_auto('nuances.csv'); CREATE OR REPLACE TEMP TABLE raw AS SELECT regexp_extract(region, '\((.+)\)', 1) AS region_code, region, regexp_extract(circo, '\((.+)\)', 1) AS circo_code, circo, trim(results->'$[0]', '"') AS nuance, trim(results->'$[1]', '"') AS candidat, CAST(results->'$[2]' AS BIGINT) AS voix, CAST(results->'$[3]' AS DECIMAL) AS inscrits, CAST(results->'$[4]' AS DECIMAL) AS exprimes, trim(results->'$[5]', '"') AS qualif, FROM ( SELECT region, circo, unnest(results) AS results FROM read_json_auto('legislatives_2024_fr_results.json') ) AS t; CREATE OR REPLACE TEMP TABLE undecided AS WITH with_qualif AS ( SELECT DISTINCT region, circo FROM raw WHERE qualif = 'QUALIF T2' ) SELECT r.* FROM raw AS r JOIN with_qualif AS u ON u.region = r.region AND u.circo = r.circo; ------------------- -- Analyse ------------------- -- Configurations de second tours par ordre de voix par qualifiés WITH versus AS ( SELECT region, circo, string_agg(nuance, ', ' ORDER BY exprimes) AS vs FROM raw WHERE qualif = 'QUALIF T2' GROUP BY region, circo ORDER BY region, circo ) SELECT count(*) AS "nombre", vs AS "configuration" FROM versus GROUP BY vs ORDER BY "nombre" DESC; -- Nombre de 2n tours SELECT CASE WHEN qualif = 'OUI' THEN 'NON' ELSE 'OUI' END AS '2nd tour ?', count(*) as 'nombre' FROM (SELECT DISTINCT region, circo, qualif FROM raw WHERE qualif in ('QUALIF T2', 'OUI')) AS t GROUP BY qualif; -- Nombre de circonscriptions qui auront un 2nd tour dans lesquelles NFP est arrivé à telle ou telle position au premier tour: WITH ordered AS ( SELECT *, ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY exprimes DESC) AS pos FROM raw WHERE qualif = 'QUALIF T2' ) SELECT pos AS "Position 1er tour", count(*) AS "Nombre de Circonscription" FROM ordered WHERE nuance = 'UG' GROUP BY pos ORDER BY pos; ---------------------- -- Projection ---------------------- CREATE OR REPLACE TEMP TABLE cleaned_undecided AS SELECT region_code, circo_code, region, circo, nuance, candidat, voix, inscrits, exprimes, CASE WHEN qualif = 'NON' THEN FALSE ELSE TRUE END AS qualified, code, nom, orientation, orientation_num, ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY exprimes DESC) AS pos FROM undecided AS r JOIN nuances AS n ON r.nuance = n.code; CREATE OR REPLACE TEMP TABLE report_voix_duel AS SELECT * FROM read_csv_auto('report_voix_duel.csv'); CREATE OR REPLACE TEMP TABLE tour_2 AS WITH duels AS ( SELECT circo_code, list(orientation ORDER BY orientation) AS orientations FROM cleaned_undecided WHERE qualified = TRUE AND pos <= 2 GROUP BY circo_code ) SELECT cu.*, CASE WHEN orientation = 'Extrême Gauche' THEN 'Gauche' ELSE orientation END AS bucket_orientation, CASE WHEN orientation_num = -2 THEN -1 ELSE orientation_num END AS bucket_orientation_num, d.orientations[1] AS orientation_candidat_1, d.orientations[2] AS orientation_candidat_2 FROM cleaned_undecided AS cu JOIN duels AS d ON d.circo_code = cu.circo_code; CREATE OR REPLACE TEMP TABLE projection AS WITH report AS ( SELECT dc.region, dc.circo, rvd."to" AS orientation, dc.exprimes * rvd.quantity AS projection FROM tour_2 AS dc JOIN report_voix_duel AS rvd ON ( dc.orientation_candidat_1 = rvd.candidat_1 AND dc.orientation_candidat_2 = rvd.candidat_2 OR dc.orientation_candidat_1 = rvd.candidat_2 AND dc.orientation_candidat_2 = rvd.candidat_1 ) AND rvd."from" = dc.orientation ORDER BY circo_code ), agg_projection AS ( SELECT region, circo, orientation, SUM(projection) AS projection FROM report GROUP BY region, circo, orientation ORDER BY region, circo, orientation ) SELECT region, circo, orientation, projection / SUM(projection) OVER (PARTITION BY region, circo) AS projection FROM agg_projection CREATE OR REPLACE TEMP TABLE projection_voix AS SELECT dc.region, dc.circo, rvd."to" AS orientation, SUM(dc.voix * rvd.quantity) AS projection FROM tour_2 AS dc JOIN report_voix_duel AS rvd ON ( dc.orientation_candidat_1 = rvd.candidat_1 AND dc.orientation_candidat_2 = rvd.candidat_2 OR dc.orientation_candidat_1 = rvd.candidat_2 AND dc.orientation_candidat_2 = rvd.candidat_1 ) AND rvd."from" = dc.orientation GROUP BY dc.region, dc.circo, rvd."to" ----- -- Circonscriptions dans lesquelles il y a 2 candidats de même orientation SELECT region, circo, orientation, string_agg(nuance, ', ' ORDER BY exprimes DESC) AS nuances FROM cleaned_undecided WHERE qualified = TRUE GROUP BY region, circo, orientation HAVING count(*) >= 2 ORDER BY orientation, region, circo -- Duels en cas de désistement automatique WITH duels_circo AS ( SELECT DISTINCT circo_code, orientation_candidat_1, orientation_candidat_2 FROM tour_2 ) SELECT orientation_candidat_1, orientation_candidat_2, count(*) AS n FROM duels_circo GROUP BY orientation_candidat_1, orientation_candidat_2 ORDER BY n DESC; -- Après report de voix WITH ordered_projection AS ( SELECT *, ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY projection DESC) AS pos FROM projection ) SELECT orientation, count(*) AS n FROM ordered_projection WHERE pos = 1 GROUP BY orientation ORDER BY n DESC; -- Projection avec faible écart CREATE OR REPLACE TEMP TABLE difference AS WITH diff_null AS ( SELECT p.region, p.circo, FIRST(p.orientation) OVER (PARTITION BY p.region, p.circo ORDER BY p.projection DESC) AS elu, ABS(p.projection - lag(p.projection) OVER (PARTITION BY p.region, p.circo)) AS diff, ABS(pv.projection - lag(pv.projection) OVER (PARTITION BY p.region, p.circo)) AS diff_voix FROM projection AS p JOIN projection_voix AS pv ON p.region = pv.region AND p.circo = pv.circo AND p.orientation = pv.orientation ORDER BY p.region, p.circo ), duels_circo AS ( SELECT DISTINCT circo_code, region, circo, orientation_candidat_1, orientation_candidat_2 FROM tour_2 ) SELECT dc.*, d.elu, d.diff, d.diff_voix FROM duels_circo AS dc JOIN diff_null AS d ON d.region = dc.region AND d.circo = dc.circo WHERE d.diff IS NOT NULL; -- Nombre de duels avec un écart entre les candidats de 1, 2, 5, 10, 20 et 50 pourcents WITH bucket_diff_orientation AS ( SELECT orientation_candidat_1 || ' v ' || orientation_candidat_2 AS duel, elu AS "projection élu", SUM(CASE WHEN diff < 0.01 THEN 1 ELSE 0 END) AS "1%", SUM(CASE WHEN diff < 0.02 THEN 1 ELSE 0 END) AS "2%", SUM(CASE WHEN diff < 0.05 THEN 1 ELSE 0 END) AS "5%", SUM(CASE WHEN diff < 0.10 THEN 1 ELSE 0 END) AS "10%", SUM(CASE WHEN diff < 0.20 THEN 1 ELSE 0 END) AS "20%", SUM(CASE WHEN diff < 0.50 THEN 1 ELSE 0 END) AS "50%", COUNT(*) AS total FROM difference GROUP BY orientation_candidat_1, orientation_candidat_2, elu ), ordered_bucket_diff_orientation AS ( SELECT *, ROW_NUMBER() OVER (ORDER BY duel DESC, "projection élu" DESC) AS r FROM bucket_diff_orientation ), res AS ( SELECT * FROM ordered_bucket_diff_orientation UNION ALL SELECT 'TOTAL' AS duel, '' AS "projection élu", SUM("1%") AS "1%", SUM("2%") AS "3%", SUM("5%") AS "5%", SUM("10%") AS "10%", SUM("20%") AS "30%", SUM("50%") AS "50%", SUM(total) AS total, 0 AS r FROM ordered_bucket_diff_orientation ) SELECT * EXCLUDE (r) FROM res ORDER BY r DESC; -- Pour les ecarts de moins de 5%, à combien de voix cela se joue ? SELECT orientation_candidat_1 || ' v ' || orientation_candidat_2 AS duel, elu AS "projection élu", round(min(diff_voix)) AS "min", round(max(diff_voix)) AS "max", round(mean(diff_voix)) AS "moyenne", round(quantile_disc(diff_voix, 0.25)) AS "1er Quartile", round(median(diff_voix)) AS "médianne", round(quantile_disc(diff_voix, 0.75)) AS "3e Quartile", FROM difference WHERE diff < 0.05 GROUP BY orientation_candidat_1, orientation_candidat_2, elu ORDER BY duel, "projection élu" -- Les "swing" circos où la gauche est présente SELECT * FROM difference WHERE orientation_candidat_1 = 'Gauche' OR orientation_candidat_2 = 'Gauche' ORDER BY diff -- Rank des "swing" circos analysés par Averroès SELECT circo_code AS 'Code Circo', region AS 'Région', circo AS 'Circo', CASE WHEN diff < 0.01 THEN TRUE ELSE FALSE END AS '<1%', CASE WHEN diff < 0.02 THEN TRUE ELSE FALSE END AS '<2%', CASE WHEN diff < 0.05 THEN TRUE ELSE FALSE END AS '<5%', CASE WHEN elu = 'Gauche' THEN TRUE ELSE FALSE END AS 'Gauche favorite ?' FROM difference WHERE circo_code IN ( '0502', '0703', '1202', '1601', '2103', '2404', '3103', '3704', '3809', '6302', '6303', '6406', '6912', '7701', '7708', '8002', '8401', '8602', '8703', '9404', '9407', '0402' ) ORDER BY circo_code