Vés al contingut

Usuari:CobainBot/scripts/cawiki/disambig.py

De la Viquipèdia, l'enciclopèdia lliure
from typing import List

import pywikibot
from pywikibot import pagegenerators as pg, Page
import datetime
from multiprocessing.pool import ThreadPool
from multiprocessing import Lock

# globals
site = pywikibot.Site('ca', 'wikipedia', 'CobainBot')


class ExtendedPage(Page):
    def __init__(self, page):
        Page.__init__(self, page)
        self.backlinks_counter = 0


class DisambigCounter:
    def __init__(self):
        self.disambig_list: List[ExtendedPage] = []
        self.total_links = 0
        self.last_seen = 0
        self.lock = Lock()

    def pool_handler(self, page):
        backlinks = len(list(page.backlinks(namespaces=0)))

        with self.lock:
            page = ExtendedPage(page)
            page.backlinks_counter = backlinks
            self.total_links += backlinks
            if page.backlinks_counter > 4:
                self.disambig_list.append(page)
            thousand = self.total_links // 1000
            if thousand > self.last_seen:
                self.last_seen = thousand
                self.verbose()

    def count(self):
        disambig_cat = pywikibot.Category(site, u'Categoria:Pàgines de desambiguació')
        disambig_pages = pg.CategorizedPageGenerator(disambig_cat, recurse=True)
        pool = ThreadPool(20)
        pool.map(self.pool_handler, disambig_pages)

    def to_string(self):
        result = f"Actualització de {{{{data|{datetime.date.today()}}}}}: {self.total_links} " \
                  f"→ Vegeu una [[#Eina per desambiguar]]\n"
        result += '\n'.join(
            f'# [[{page.title()}]]: {page.backlinks_counter} '
            f'[[Special:Whatlinkshere/{page.title()}|enllaços]]' for page in self.disambig_list
        )
        return result

    def write(self):
        with open('resources/llista_desambiguacions.txt', 'w', encoding='utf8') as fp:
            fp.write(self.to_string())

    def verbose(self, item=''):
        if item:
            item = f'{item} '
        print(f"[{datetime.datetime.now():%H:%M:%S}] {item}items: {self.total_links}")

    def run(self):
        self.verbose('init time.')
        self.count()
        self.disambig_list.sort(key=lambda page: page.backlinks_counter, reverse=True)
        self.write()
        self.verbose('end time.')


if __name__ == '__main__':
    site.login()
    disambig = DisambigCounter()
    disambig.run()