Usuari:CobainBot/scripts/cawiki/disambig.py
Aparença
from typing import List
import pywikibot
from pywikibot import pagegenerators as pg, Page
import datetime
from multiprocessing.pool import ThreadPool
from multiprocessing import Lock
# globals
site = pywikibot.Site('ca', 'wikipedia', 'CobainBot')
class ExtendedPage(Page):
def __init__(self, page):
Page.__init__(self, page)
self.backlinks_counter = 0
class DisambigCounter:
def __init__(self):
self.disambig_list: List[ExtendedPage] = []
self.total_links = 0
self.last_seen = 0
self.lock = Lock()
def pool_handler(self, page):
backlinks = len(list(page.backlinks(namespaces=0)))
with self.lock:
page = ExtendedPage(page)
page.backlinks_counter = backlinks
self.total_links += backlinks
if page.backlinks_counter > 4:
self.disambig_list.append(page)
thousand = self.total_links // 1000
if thousand > self.last_seen:
self.last_seen = thousand
self.verbose()
def count(self):
disambig_cat = pywikibot.Category(site, u'Categoria:Pàgines de desambiguació')
disambig_pages = pg.CategorizedPageGenerator(disambig_cat, recurse=True)
pool = ThreadPool(20)
pool.map(self.pool_handler, disambig_pages)
def to_string(self):
result = f"Actualització de {{{{data|{datetime.date.today()}}}}}: {self.total_links} " \
f"→ Vegeu una [[#Eina per desambiguar]]\n"
result += '\n'.join(
f'# [[{page.title()}]]: {page.backlinks_counter} '
f'[[Special:Whatlinkshere/{page.title()}|enllaços]]' for page in self.disambig_list
)
return result
def write(self):
with open('resources/llista_desambiguacions.txt', 'w', encoding='utf8') as fp:
fp.write(self.to_string())
def verbose(self, item=''):
if item:
item = f'{item} '
print(f"[{datetime.datetime.now():%H:%M:%S}] {item}items: {self.total_links}")
def run(self):
self.verbose('init time.')
self.count()
self.disambig_list.sort(key=lambda page: page.backlinks_counter, reverse=True)
self.write()
self.verbose('end time.')
if __name__ == '__main__':
site.login()
disambig = DisambigCounter()
disambig.run()