Usuari:TronaBot/Python/segonallegida.py

De la Viquipèdia, l'enciclopèdia lliure
#!/usr/bin/python
# -*- coding: utf-8  -*-
import os, sys, re, codecs
prevfolder = os.path.split(os.getcwd())[0]
sys.path.append(os.path.split(prevfolder)[0])
import wikipedia as pywikilib, pagegenerators as pg

pairs={
	u"àáâäãăǎąåā": "a", u'æǣ': "ae",
	u'ḃɓ': "b",
	u'çćčćĉċ': "c",
	u'đḍďḋ': "d", u"ð": "dz",
	u'èéêëẽēę': "e",
	u'ḟƒ': "f",
	u'ĝġģğ': "g",
	u'ĥħ': "h",
	u'ìíîïīį': "i", u'ij': "ij",
	u'ĵ': "j",
	u'ķ': "k",
	u'ŀļḷḹľł': "l",
	u'ñńň': "n",
	u'òóôöõøōǫ': "o",
	u'œ': "oe",
	u'ṗ': "p",
	u'ŗřṛṝ': "r",
	u'şṡšŝ': "s", u'ß': "sz",
	u'ţṫṭ': "t",
	u'Þ': "tz",
	u'ùúûüŭūų': "u",
	u'ẁŵẅƿ': "w",
	u'ýỳŷÿȳỹ': "y",
	u'źžż': "z"
}
diacritics = "".join(pairs.keys())
def simplify_chars(word):
	#simplifiquem els diacrítics per a l'ordre alfabètic
	word = word.lower()
	for ch in word:
		if ch in diacritics:
			for keys in pairs:
				if ch in keys:
					word=word.replace(ch, pairs[keys])
					break
	word=word.replace(u"l·l","ll")
	word = re.sub("\W","!", word)
	return word

def sort_list(old_list):
	#ordena una llista
	simplifiedlist={}
	for word in old_list:
		simplifiedlist[simplify_chars(word)]=word
	new_list=[]
	for word in sorted(simplifiedlist.keys()):
		new_list.append(simplifiedlist[word])
	return new_list

def get_referred_page(page):
       refpage = pywikilib.Page(site, page)
       allpages = pg.ReferringPageGenerator(refpage, onlyTemplateInclusion=True)
       articles = pg.NamespaceFilterPageGenerator(allpages, 0) # només espai de nou
       return articles #pg.PreloadingGenerator(articles, pageNumber = 50)

def main():
	pages = {}
	for page in get_referred_page("Template:Segona llegida"):
		pages.update({page.title(): len(page.get())})

	f=codecs.open(u"segonallegida-ordreafabètic.log", "w", "utf8")
	for page in sort_list(pages.keys()):
		line = u"%s %i O." % (page, pages[page])
		pywikilib.output(line)
		f1.write(u"%s\n" % line)
		f1.flush()
	f.close()

	pages_by_size={}
	for size in sorted(set(pages.values())):
		for title in pages:
			if pages_by_size.has_key(size):
				if pages[title] == size: pages_by_size[size].append(title)
			elif not pages_by_size.has_key(size):
				if pages[title] == size:
					pages_by_size[size]=[title]

	f=codecs.open("segonallegida-ordremida.log","w","utf8")
	for size in reversed(sorted(pages_by_size.keys())):
		if len(pages_by_size[size])>1:
			pages = "\n".join(pages_by_size[size]).replace("\n", " %i\n" % size)
			line = pages
		else:
			pages=pages_by_size[size][0]
			line = u"%s %i c." % (pages, size)
		#pywikilib.output(line)
		f.write(u"%s\n" % line)
		f.flush()
	f.close()
	f=codecs.open(u"segonallegida-ordremida.log","r","utf8")
	print len (lines), len(f.readlines())
	f.close()


if __name__ == '__main__':
	site=pywikilib.getSite("ca","wikipedia")
	main()