349 lines
8.9 KiB
SQL
349 lines
8.9 KiB
SQL
-------------------------
|
|
-- Prep
|
|
-------------------------
|
|
SET file_search_path = '/home/daifukusan/Documents/analysis/legislatives2024/res/';
|
|
|
|
CREATE OR REPLACE TEMP TABLE nuances AS
|
|
SELECT
|
|
*,
|
|
CASE
|
|
WHEN orientation = 'Extrême Gauche' THEN -2
|
|
WHEN orientation = 'Gauche' THEN -1
|
|
WHEN orientation = 'Centre' THEN 0
|
|
WHEN orientation = 'Droite' THEN 1
|
|
WHEN orientation = 'Extrême Droite' THEN 2
|
|
END AS 'orientation_num'
|
|
FROM read_csv_auto('nuances.csv');
|
|
|
|
CREATE OR REPLACE TEMP TABLE raw AS
|
|
SELECT
|
|
regexp_extract(region, '\((.+)\)', 1) AS region_code,
|
|
region,
|
|
regexp_extract(circo, '\((.+)\)', 1) AS circo_code,
|
|
circo,
|
|
trim(results->'$[0]', '"') AS nuance,
|
|
trim(results->'$[1]', '"') AS candidat,
|
|
CAST(results->'$[2]' AS BIGINT) AS voix,
|
|
CAST(results->'$[3]' AS DECIMAL) AS inscrits,
|
|
CAST(results->'$[4]' AS DECIMAL) AS exprimes,
|
|
trim(results->'$[5]', '"') AS qualif,
|
|
FROM (
|
|
SELECT region, circo, unnest(results) AS results
|
|
FROM read_json_auto('legislatives_2024_fr_results.json')
|
|
) AS t;
|
|
|
|
|
|
CREATE OR REPLACE TEMP TABLE undecided AS
|
|
WITH with_qualif AS (
|
|
SELECT DISTINCT region, circo
|
|
FROM raw
|
|
WHERE qualif = 'QUALIF T2'
|
|
)
|
|
SELECT r.* FROM raw AS r
|
|
JOIN with_qualif AS u
|
|
ON u.region = r.region
|
|
AND u.circo = r.circo;
|
|
|
|
-------------------
|
|
-- Analyse
|
|
-------------------
|
|
|
|
-- Configurations de second tours par ordre de voix par qualifiés
|
|
WITH versus AS (
|
|
SELECT region, circo, string_agg(nuance, ', ' ORDER BY exprimes) AS vs
|
|
FROM raw
|
|
WHERE qualif = 'QUALIF T2'
|
|
GROUP BY region, circo
|
|
ORDER BY region, circo
|
|
)
|
|
SELECT
|
|
count(*) AS "nombre",
|
|
vs AS "configuration" FROM versus
|
|
GROUP BY vs
|
|
ORDER BY "nombre" DESC;
|
|
|
|
-- Nombre de 2n tours
|
|
SELECT
|
|
CASE WHEN qualif = 'OUI' THEN 'NON' ELSE 'OUI' END AS '2nd tour ?',
|
|
count(*) as 'nombre'
|
|
FROM (SELECT DISTINCT region, circo, qualif FROM raw WHERE qualif in ('QUALIF T2', 'OUI')) AS t
|
|
GROUP BY qualif;
|
|
|
|
|
|
-- Nombre de circonscriptions qui auront un 2nd tour dans lesquelles NFP est arrivé à telle ou telle position au premier tour:
|
|
WITH ordered AS (
|
|
SELECT
|
|
*,
|
|
ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY exprimes DESC) AS pos
|
|
FROM raw
|
|
WHERE qualif = 'QUALIF T2'
|
|
)
|
|
SELECT pos AS "Position 1er tour", count(*) AS "Nombre de Circonscription" FROM ordered
|
|
WHERE nuance = 'UG'
|
|
GROUP BY pos
|
|
ORDER BY pos;
|
|
|
|
----------------------
|
|
-- Projection
|
|
----------------------
|
|
|
|
CREATE OR REPLACE TEMP TABLE cleaned_undecided AS
|
|
SELECT
|
|
region_code,
|
|
circo_code,
|
|
region,
|
|
circo,
|
|
nuance,
|
|
candidat,
|
|
voix,
|
|
inscrits,
|
|
exprimes,
|
|
CASE WHEN qualif = 'NON' THEN FALSE ELSE TRUE END AS qualified,
|
|
code,
|
|
nom,
|
|
orientation,
|
|
orientation_num,
|
|
ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY exprimes DESC) AS pos
|
|
FROM undecided AS r
|
|
JOIN nuances AS n
|
|
ON r.nuance = n.code;
|
|
|
|
CREATE OR REPLACE TEMP TABLE report_voix_duel AS
|
|
SELECT
|
|
*
|
|
FROM read_csv_auto('report_voix_duel.csv');
|
|
|
|
|
|
CREATE OR REPLACE TEMP TABLE tour_2 AS
|
|
WITH duels AS (
|
|
SELECT circo_code, list(orientation ORDER BY orientation) AS orientations
|
|
FROM cleaned_undecided
|
|
WHERE qualified = TRUE
|
|
AND pos <= 2
|
|
GROUP BY circo_code
|
|
)
|
|
SELECT
|
|
cu.*,
|
|
CASE WHEN orientation = 'Extrême Gauche' THEN 'Gauche' ELSE orientation END AS bucket_orientation,
|
|
CASE WHEN orientation_num = -2 THEN -1 ELSE orientation_num END AS bucket_orientation_num,
|
|
d.orientations[1] AS orientation_candidat_1,
|
|
d.orientations[2] AS orientation_candidat_2
|
|
FROM cleaned_undecided AS cu
|
|
JOIN duels AS d
|
|
ON d.circo_code = cu.circo_code;
|
|
|
|
CREATE OR REPLACE TEMP TABLE projection AS
|
|
WITH report AS (
|
|
SELECT
|
|
dc.region,
|
|
dc.circo,
|
|
rvd."to" AS orientation,
|
|
dc.exprimes * rvd.quantity AS projection
|
|
FROM tour_2 AS dc
|
|
JOIN report_voix_duel AS rvd
|
|
ON (
|
|
dc.orientation_candidat_1 = rvd.candidat_1 AND dc.orientation_candidat_2 = rvd.candidat_2
|
|
OR dc.orientation_candidat_1 = rvd.candidat_2 AND dc.orientation_candidat_2 = rvd.candidat_1
|
|
) AND rvd."from" = dc.orientation
|
|
ORDER BY circo_code
|
|
), agg_projection AS (
|
|
SELECT region, circo, orientation, SUM(projection) AS projection
|
|
FROM report
|
|
GROUP BY region, circo, orientation
|
|
ORDER BY region, circo, orientation
|
|
)
|
|
SELECT
|
|
region,
|
|
circo,
|
|
orientation,
|
|
projection / SUM(projection) OVER (PARTITION BY region, circo) AS projection
|
|
FROM agg_projection
|
|
|
|
CREATE OR REPLACE TEMP TABLE projection_voix AS
|
|
SELECT
|
|
dc.region,
|
|
dc.circo,
|
|
rvd."to" AS orientation,
|
|
SUM(dc.voix * rvd.quantity) AS projection
|
|
FROM tour_2 AS dc
|
|
JOIN report_voix_duel AS rvd
|
|
ON (
|
|
dc.orientation_candidat_1 = rvd.candidat_1 AND dc.orientation_candidat_2 = rvd.candidat_2
|
|
OR dc.orientation_candidat_1 = rvd.candidat_2 AND dc.orientation_candidat_2 = rvd.candidat_1
|
|
) AND rvd."from" = dc.orientation
|
|
GROUP BY
|
|
dc.region,
|
|
dc.circo,
|
|
rvd."to"
|
|
|
|
-----
|
|
|
|
-- Circonscriptions dans lesquelles il y a 2 candidats de même orientation
|
|
SELECT region, circo, orientation, string_agg(nuance, ', ' ORDER BY exprimes DESC) AS nuances
|
|
FROM cleaned_undecided
|
|
WHERE qualified = TRUE
|
|
GROUP BY region, circo, orientation
|
|
HAVING count(*) >= 2
|
|
ORDER BY orientation, region, circo
|
|
|
|
-- Duels en cas de désistement automatique
|
|
WITH duels_circo AS (
|
|
SELECT DISTINCT circo_code, orientation_candidat_1, orientation_candidat_2
|
|
FROM tour_2
|
|
)
|
|
SELECT orientation_candidat_1, orientation_candidat_2, count(*) AS n
|
|
FROM duels_circo
|
|
GROUP BY orientation_candidat_1, orientation_candidat_2
|
|
ORDER BY n DESC;
|
|
|
|
-- Après report de voix
|
|
WITH ordered_projection AS (
|
|
SELECT
|
|
*,
|
|
ROW_NUMBER() OVER (PARTITION BY region, circo ORDER BY projection DESC) AS pos
|
|
FROM projection
|
|
)
|
|
SELECT orientation, count(*) AS n
|
|
FROM ordered_projection
|
|
WHERE pos = 1
|
|
GROUP BY orientation
|
|
ORDER BY n DESC;
|
|
|
|
|
|
-- Projection avec faible écart
|
|
CREATE OR REPLACE TEMP TABLE difference AS
|
|
WITH diff_null AS (
|
|
SELECT
|
|
p.region,
|
|
p.circo,
|
|
FIRST(p.orientation) OVER (PARTITION BY p.region, p.circo ORDER BY p.projection DESC) AS elu,
|
|
ABS(p.projection - lag(p.projection) OVER (PARTITION BY p.region, p.circo)) AS diff,
|
|
ABS(pv.projection - lag(pv.projection) OVER (PARTITION BY p.region, p.circo)) AS diff_voix
|
|
FROM projection AS p
|
|
JOIN projection_voix AS pv
|
|
ON p.region = pv.region
|
|
AND p.circo = pv.circo
|
|
AND p.orientation = pv.orientation
|
|
ORDER BY p.region, p.circo
|
|
), duels_circo AS (
|
|
SELECT DISTINCT circo_code, region, circo, orientation_candidat_1, orientation_candidat_2
|
|
FROM tour_2
|
|
)
|
|
SELECT dc.*, d.elu, d.diff, d.diff_voix
|
|
FROM duels_circo AS dc
|
|
JOIN diff_null AS d
|
|
ON d.region = dc.region
|
|
AND d.circo = dc.circo
|
|
WHERE d.diff IS NOT NULL;
|
|
|
|
|
|
-- Nombre de duels avec un écart entre les candidats de 1, 2, 5, 10, 20 et 50 pourcents
|
|
WITH bucket_diff_orientation AS (
|
|
SELECT
|
|
orientation_candidat_1 || ' v ' || orientation_candidat_2 AS duel,
|
|
elu AS "projection élu",
|
|
SUM(CASE WHEN diff < 0.01 THEN 1 ELSE 0 END) AS "1%",
|
|
SUM(CASE WHEN diff < 0.02 THEN 1 ELSE 0 END) AS "2%",
|
|
SUM(CASE WHEN diff < 0.05 THEN 1 ELSE 0 END) AS "5%",
|
|
SUM(CASE WHEN diff < 0.10 THEN 1 ELSE 0 END) AS "10%",
|
|
SUM(CASE WHEN diff < 0.20 THEN 1 ELSE 0 END) AS "20%",
|
|
SUM(CASE WHEN diff < 0.50 THEN 1 ELSE 0 END) AS "50%",
|
|
COUNT(*) AS total
|
|
FROM difference
|
|
GROUP BY
|
|
orientation_candidat_1,
|
|
orientation_candidat_2,
|
|
elu
|
|
), ordered_bucket_diff_orientation AS (
|
|
SELECT
|
|
*,
|
|
ROW_NUMBER() OVER (ORDER BY duel DESC, "projection élu" DESC) AS r
|
|
FROM bucket_diff_orientation
|
|
), res AS (
|
|
SELECT *
|
|
FROM ordered_bucket_diff_orientation
|
|
UNION ALL
|
|
SELECT
|
|
'TOTAL' AS duel,
|
|
'' AS "projection élu",
|
|
SUM("1%") AS "1%",
|
|
SUM("2%") AS "3%",
|
|
SUM("5%") AS "5%",
|
|
SUM("10%") AS "10%",
|
|
SUM("20%") AS "30%",
|
|
SUM("50%") AS "50%",
|
|
SUM(total) AS total,
|
|
0 AS r
|
|
FROM ordered_bucket_diff_orientation
|
|
)
|
|
SELECT * EXCLUDE (r)
|
|
FROM res
|
|
ORDER BY r DESC;
|
|
|
|
|
|
-- Pour les ecarts de moins de 5%, à combien de voix cela se joue ?
|
|
SELECT
|
|
orientation_candidat_1 || ' v ' || orientation_candidat_2 AS duel,
|
|
elu AS "projection élu",
|
|
round(min(diff_voix)) AS "min",
|
|
round(max(diff_voix)) AS "max",
|
|
round(mean(diff_voix)) AS "moyenne",
|
|
round(quantile_disc(diff_voix, 0.25)) AS "1er Quartile",
|
|
round(median(diff_voix)) AS "médianne",
|
|
round(quantile_disc(diff_voix, 0.75)) AS "3e Quartile",
|
|
FROM difference
|
|
WHERE diff < 0.05
|
|
GROUP BY
|
|
orientation_candidat_1,
|
|
orientation_candidat_2,
|
|
elu
|
|
ORDER BY
|
|
duel,
|
|
"projection élu"
|
|
|
|
|
|
-- Les "swing" circos où la gauche est présente
|
|
SELECT *
|
|
FROM difference
|
|
WHERE orientation_candidat_1 = 'Gauche'
|
|
OR orientation_candidat_2 = 'Gauche'
|
|
ORDER BY diff
|
|
|
|
-- Rank des "swing" circos analysés par Averroès
|
|
SELECT
|
|
circo_code AS 'Code Circo',
|
|
region AS 'Région',
|
|
circo AS 'Circo',
|
|
CASE WHEN diff < 0.01 THEN TRUE ELSE FALSE END AS '<1%',
|
|
CASE WHEN diff < 0.02 THEN TRUE ELSE FALSE END AS '<2%',
|
|
CASE WHEN diff < 0.05 THEN TRUE ELSE FALSE END AS '<5%',
|
|
CASE WHEN elu = 'Gauche' THEN TRUE ELSE FALSE END AS 'Gauche favorite ?'
|
|
FROM difference
|
|
WHERE circo_code IN (
|
|
'0502',
|
|
'0703',
|
|
'1202',
|
|
'1601',
|
|
'2103',
|
|
'2404',
|
|
'3103',
|
|
'3704',
|
|
'3809',
|
|
'6302',
|
|
'6303',
|
|
'6406',
|
|
'6912',
|
|
'7701',
|
|
'7708',
|
|
'8002',
|
|
'8401',
|
|
'8602',
|
|
'8703',
|
|
'9404',
|
|
'9407',
|
|
'0402'
|
|
)
|
|
ORDER BY circo_code
|
|
|
|
|