[{"data":1,"prerenderedAt":529},["ShallowReactive",2],{"index":3,"/projects/motorsport-pipeline":38,"/projects/motorsport-pipeline-surround":514},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":5,"title":7,"description":8,"informations":9,"_id":32,"_type":33,"_source":34,"_file":35,"_stem":36,"_extension":37},"/","",false,"Pierre Graef - Data Engineer","Pierre Graef — Data Engineer specializing in ETL pipelines, cloud integration, and scalable data architectures. Proficient in Python, SQL, Spark, and Airflow.",{"name":10,"job":11,"availability":12,"email":13,"link":14,"profile":18,"skills":20,"description":21,"experience":22},"Pierre Graef","Data Engineer",true,"graef.pierre@gmail.com",{"x":5,"linkedin":15,"instagram":5,"github":16,"kaggle":17},"https://www.linkedin.com/in/pierre-graef","https://github.com/PierreGR7","https://www.kaggle.com/pierregraef",{"url":19},"/img/profile.jpg","Python | SQL | Spark | Airflow","I am a Data Engineer specializing in data architecture, ETL pipelines, and cloud integration. Proficient in Python, SQL, Spark, Airflow, I design scalable data systems, optimize workflows, and build insightful dashboards with Power BI. With international experience, I transform complex datasets into actionable business solutions.",[23,27],{"company":24,"role":11,"period":25,"location":26},"[CeA]","2024 – Present","Strasbourg, France",{"company":28,"role":29,"period":30,"location":31},"[Munich Re]","Intern Data Analyst",2024,"[Munich, Germany]","content:index.yml","yaml","content","index.yml","index","yml",{"_path":39,"_dir":40,"_draft":6,"_partial":6,"_locale":5,"title":41,"description":42,"authors":43,"image":47,"date":49,"body":50,"_type":509,"_id":510,"_source":34,"_file":511,"_stem":512,"_extension":513},"/projects/motorsport-pipeline","projects","Motorsport Data Pipeline (ETL + Dashboard)","Web scraping, SQLite, and Streamlit for multi-championship motorsport data",[44],{"name":10,"to":5,"avatar":45},{"src":46},"https://i.pravatar.cc/128?u=2",{"src":48},"/img/article_motorsport/cover.png","2026-03-26T00:00:00.000Z",{"type":51,"children":52,"toc":500},"root",[53,69,74,81,123,129,138,144,157,269,282,288,293,340,345,351,356,374,380,472,478,494],{"type":54,"tag":55,"props":56,"children":57},"element","p",{},[58,61,67],{"type":59,"value":60},"text","An end-to-end data engineering project collecting structured data on ",{"type":54,"tag":62,"props":63,"children":64},"strong",{},[65],{"type":59,"value":66},"motorsport championships, teams, circuits, and events",{"type":59,"value":68}," — from raw HTML scraping to a normalized SQLite database and an interactive Streamlit dashboard.",{"type":54,"tag":55,"props":70,"children":71},{},[72],{"type":59,"value":73},"The project is structured around real data engineering principles: modular scrapers, a cleaning pipeline, schema-driven storage, and a visualization layer on top.",{"type":54,"tag":75,"props":76,"children":78},"h2",{"id":77},"goals",[79],{"type":59,"value":80},"Goals",{"type":54,"tag":82,"props":83,"children":84},"ul",{},[85,91,96,101],{"type":54,"tag":86,"props":87,"children":88},"li",{},[89],{"type":59,"value":90},"Automate the collection of motorsport data from public web sources (primarily Wikipedia).",{"type":54,"tag":86,"props":92,"children":93},{},[94],{"type":59,"value":95},"Build a clean, normalized relational database from heterogeneous HTML tables.",{"type":54,"tag":86,"props":97,"children":98},{},[99],{"type":59,"value":100},"Deliver an interactive dashboard for exploring championships, circuits, and teams.",{"type":54,"tag":86,"props":102,"children":103},{},[104,106,113,115,121],{"type":59,"value":105},"Practice good scraping hygiene: respecting ",{"type":54,"tag":107,"props":108,"children":110},"code",{"className":109},[],[111],{"type":59,"value":112},"robots.txt",{"type":59,"value":114},", adding request delays, using identifiable ",{"type":54,"tag":107,"props":116,"children":118},{"className":117},[],[119],{"type":59,"value":120},"User-Agent",{"type":59,"value":122}," headers.",{"type":54,"tag":75,"props":124,"children":126},{"id":125},"architecture",[127],{"type":59,"value":128},"Architecture",{"type":54,"tag":130,"props":131,"children":133},"pre",{"code":132},"scrapers/     → HTTP extraction (Wikipedia wikitables)\npipelines/    → Cleaning, normalization, orchestration\ndatabase/     → SQL schema + SQLite file (motorsport.db)\ndata/raw/     → Unversioned raw CSVs from scrapers\ndata/processed/ → Normalized CSVs ready for DB load\nnotebooks/    → Exploratory analysis\ndashboard/    → Streamlit app\n",[134],{"type":54,"tag":107,"props":135,"children":136},{"__ignoreMap":5},[137],{"type":59,"value":132},{"type":54,"tag":75,"props":139,"children":141},{"id":140},"etl-pipeline",[142],{"type":59,"value":143},"ETL Pipeline",{"type":54,"tag":55,"props":145,"children":146},{},[147,149,155],{"type":59,"value":148},"The pipeline is orchestrated by a single entry point (",{"type":54,"tag":107,"props":150,"children":152},{"className":151},[],[153],{"type":59,"value":154},"build_dataset.py",{"type":59,"value":156},") that accepts CLI flags for partial runs:",{"type":54,"tag":130,"props":158,"children":162},{"code":159,"language":160,"meta":5,"className":161,"style":5},"# Full pipeline\npython pipelines/build_dataset.py\n\n# Only Wikipedia championship data\npython pipelines/build_dataset.py --only wiki\n\n# Skip teams (faster iteration)\npython pipelines/build_dataset.py --skip-teams\n","bash","language-bash shiki shiki-themes material-theme-lighter material-theme material-theme-palenight",[163],{"type":54,"tag":107,"props":164,"children":165},{"__ignoreMap":5},[166,178,194,203,212,235,243,252],{"type":54,"tag":167,"props":168,"children":171},"span",{"class":169,"line":170},"line",1,[172],{"type":54,"tag":167,"props":173,"children":175},{"style":174},"--shiki-light:#90A4AE;--shiki-default:#546E7A;--shiki-dark:#676E95;--shiki-light-font-style:italic;--shiki-default-font-style:italic;--shiki-dark-font-style:italic",[176],{"type":59,"value":177},"# Full pipeline\n",{"type":54,"tag":167,"props":179,"children":181},{"class":169,"line":180},2,[182,188],{"type":54,"tag":167,"props":183,"children":185},{"style":184},"--shiki-light:#E2931D;--shiki-default:#FFCB6B;--shiki-dark:#FFCB6B",[186],{"type":59,"value":187},"python",{"type":54,"tag":167,"props":189,"children":191},{"style":190},"--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D",[192],{"type":59,"value":193}," pipelines/build_dataset.py\n",{"type":54,"tag":167,"props":195,"children":197},{"class":169,"line":196},3,[198],{"type":54,"tag":167,"props":199,"children":200},{"emptyLinePlaceholder":12},[201],{"type":59,"value":202},"\n",{"type":54,"tag":167,"props":204,"children":206},{"class":169,"line":205},4,[207],{"type":54,"tag":167,"props":208,"children":209},{"style":174},[210],{"type":59,"value":211},"# Only Wikipedia championship data\n",{"type":54,"tag":167,"props":213,"children":215},{"class":169,"line":214},5,[216,220,225,230],{"type":54,"tag":167,"props":217,"children":218},{"style":184},[219],{"type":59,"value":187},{"type":54,"tag":167,"props":221,"children":222},{"style":190},[223],{"type":59,"value":224}," pipelines/build_dataset.py",{"type":54,"tag":167,"props":226,"children":227},{"style":190},[228],{"type":59,"value":229}," --only",{"type":54,"tag":167,"props":231,"children":232},{"style":190},[233],{"type":59,"value":234}," wiki\n",{"type":54,"tag":167,"props":236,"children":238},{"class":169,"line":237},6,[239],{"type":54,"tag":167,"props":240,"children":241},{"emptyLinePlaceholder":12},[242],{"type":59,"value":202},{"type":54,"tag":167,"props":244,"children":246},{"class":169,"line":245},7,[247],{"type":54,"tag":167,"props":248,"children":249},{"style":174},[250],{"type":59,"value":251},"# Skip teams (faster iteration)\n",{"type":54,"tag":167,"props":253,"children":255},{"class":169,"line":254},8,[256,260,264],{"type":54,"tag":167,"props":257,"children":258},{"style":184},[259],{"type":59,"value":187},{"type":54,"tag":167,"props":261,"children":262},{"style":190},[263],{"type":59,"value":224},{"type":54,"tag":167,"props":265,"children":266},{"style":190},[267],{"type":59,"value":268}," --skip-teams\n",{"type":54,"tag":55,"props":270,"children":271},{},[272,274,280],{"type":59,"value":273},"Each scraper targets a specific Wikipedia article structure (e.g., \"List of … series\" pages exposing ",{"type":54,"tag":107,"props":275,"children":277},{"className":276},[],[278],{"type":59,"value":279},"wikitable",{"type":59,"value":281}," HTML). The cleaning layer normalizes column names, deduplicates rows, fixes encoding issues, and outputs consistent CSVs.",{"type":54,"tag":75,"props":283,"children":285},{"id":284},"database-schema",[286],{"type":59,"value":287},"Database Schema",{"type":54,"tag":55,"props":289,"children":290},{},[291],{"type":59,"value":292},"The SQLite database covers four main entities:",{"type":54,"tag":82,"props":294,"children":295},{},[296,307,318,329],{"type":54,"tag":86,"props":297,"children":298},{},[299,305],{"type":54,"tag":107,"props":300,"children":302},{"className":301},[],[303],{"type":59,"value":304},"championships",{"type":59,"value":306}," — name, type, founding year, governing body",{"type":54,"tag":86,"props":308,"children":309},{},[310,316],{"type":54,"tag":107,"props":311,"children":313},{"className":312},[],[314],{"type":59,"value":315},"circuits",{"type":59,"value":317}," — name, location, country, length",{"type":54,"tag":86,"props":319,"children":320},{},[321,327],{"type":54,"tag":107,"props":322,"children":324},{"className":323},[],[325],{"type":59,"value":326},"teams",{"type":59,"value":328}," — constructor, nationality, active seasons",{"type":54,"tag":86,"props":330,"children":331},{},[332,338],{"type":54,"tag":107,"props":333,"children":335},{"className":334},[],[336],{"type":59,"value":337},"events",{"type":59,"value":339}," — calendar placeholder (extended in future versions)",{"type":54,"tag":55,"props":341,"children":342},{},[343],{"type":59,"value":344},"All tables are replaced on each pipeline run to ensure reproducibility.",{"type":54,"tag":75,"props":346,"children":348},{"id":347},"streamlit-dashboard",[349],{"type":59,"value":350},"Streamlit Dashboard",{"type":54,"tag":55,"props":352,"children":353},{},[354],{"type":59,"value":355},"The dashboard provides an interactive interface to explore the loaded data:",{"type":54,"tag":82,"props":357,"children":358},{},[359,364,369],{"type":54,"tag":86,"props":360,"children":361},{},[362],{"type":59,"value":363},"Championship browser with filter by type (circuit racing, rallying, karting…)",{"type":54,"tag":86,"props":365,"children":366},{},[367],{"type":59,"value":368},"Circuit map view by country",{"type":54,"tag":86,"props":370,"children":371},{},[372],{"type":59,"value":373},"Team directory with season activity ranges",{"type":54,"tag":75,"props":375,"children":377},{"id":376},"tools-technologies",[378],{"type":59,"value":379},"Tools & Technologies",{"type":54,"tag":82,"props":381,"children":382},{},[383,409,425,442,452,462],{"type":54,"tag":86,"props":384,"children":385},{},[386,391,393,399,401,407],{"type":54,"tag":62,"props":387,"children":388},{},[389],{"type":59,"value":390},"Scraping",{"type":59,"value":392},": Python (",{"type":54,"tag":107,"props":394,"children":396},{"className":395},[],[397],{"type":59,"value":398},"requests",{"type":59,"value":400},", ",{"type":54,"tag":107,"props":402,"children":404},{"className":403},[],[405],{"type":59,"value":406},"BeautifulSoup",{"type":59,"value":408},")",{"type":54,"tag":86,"props":410,"children":411},{},[412,417,419],{"type":54,"tag":62,"props":413,"children":414},{},[415],{"type":59,"value":416},"Cleaning",{"type":59,"value":418},": ",{"type":54,"tag":107,"props":420,"children":422},{"className":421},[],[423],{"type":59,"value":424},"pandas",{"type":54,"tag":86,"props":426,"children":427},{},[428,433,435,441],{"type":54,"tag":62,"props":429,"children":430},{},[431],{"type":59,"value":432},"Storage",{"type":59,"value":434},": SQLite (",{"type":54,"tag":107,"props":436,"children":438},{"className":437},[],[439],{"type":59,"value":440},"sqlite3",{"type":59,"value":408},{"type":54,"tag":86,"props":443,"children":444},{},[445,450],{"type":54,"tag":62,"props":446,"children":447},{},[448],{"type":59,"value":449},"Dashboard",{"type":59,"value":451},": Streamlit",{"type":54,"tag":86,"props":453,"children":454},{},[455,460],{"type":54,"tag":62,"props":456,"children":457},{},[458],{"type":59,"value":459},"CI/CD",{"type":59,"value":461},": GitHub Actions (linting + pipeline smoke test)",{"type":54,"tag":86,"props":463,"children":464},{},[465,470],{"type":54,"tag":62,"props":466,"children":467},{},[468],{"type":59,"value":469},"Version control",{"type":59,"value":471},": Git + GitHub",{"type":54,"tag":75,"props":473,"children":475},{"id":474},"project-links",[476],{"type":59,"value":477},"Project Links",{"type":54,"tag":82,"props":479,"children":480},{},[481],{"type":54,"tag":86,"props":482,"children":483},{},[484,486],{"type":59,"value":485},"GitHub repository: ",{"type":54,"tag":487,"props":488,"children":492},"a",{"href":489,"rel":490},"https://github.com/PierreGR7/motorsport-championships-scrapping",[491],"nofollow",[493],{"type":59,"value":489},{"type":54,"tag":495,"props":496,"children":497},"style",{},[498],{"type":59,"value":499},"html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":5,"searchDepth":180,"depth":180,"links":501},[502,503,504,505,506,507,508],{"id":77,"depth":180,"text":80},{"id":125,"depth":180,"text":128},{"id":140,"depth":180,"text":143},{"id":284,"depth":180,"text":287},{"id":347,"depth":180,"text":350},{"id":376,"depth":180,"text":379},{"id":474,"depth":180,"text":477},"markdown","content:projects:motorsport-pipeline.md","projects/motorsport-pipeline.md","projects/motorsport-pipeline","md",[515,516],null,{"_path":517,"_dir":40,"_draft":6,"_partial":6,"_locale":5,"title":518,"description":519,"authors":520,"image":523,"date":525,"_type":509,"_id":526,"_source":34,"_file":527,"_stem":528,"_extension":513},"/projects/market-pulse","MarketPulse API","A Dockerized Financial Microservice in Python",[521],{"name":10,"to":5,"avatar":522},{"src":46},{"src":524},"/img/article_marketpulse/marketpulse.jpg","2025-10-31T01:00:00.000Z","content:projects:market-pulse.md","projects/market-pulse.md","projects/market-pulse",1779746236773]