@article{TraerisBalsacq, author = {Tim Tr{\"a}ris and Maxim Balsacq}, title = {Leveraging Wikipedia Page Edits for Analytical Processing}, series = {informatikJournal}, volume = {12.2021}, url = {https://nbn-resolving.org/urn:nbn:de:bsz:fn1-opus4-77135}, pages = {63 -- 68}, abstract = {Wikipedia is the largest free encyclopedia and one of the most popular websites worldwide. Analyzing user activity within this encyclopedic ecosystem represents unique opportunities for academic research and analysis. For this reason, this work is fundamentally concerned with obtaining and processing real-time article edit streams from Wikipedia. In this regard, we leverage the Wikimedia EventStreams API and propose a general-purpose event pipeline allowing for further processing of observed page edits. In the suggested pipeline, events are ingested and transported via an Apache Kafka cluster and inserted into a ClickHouse database for storage and analysis. Finally, we confirm the viability of our design by exploring several exemplary analytical use cases.}, language = {en} }