Usuari:TronaBot/Python/segonallegida.py: diferència entre les revisions
Aparença
Contingut suprimit Contingut afegit
Cap resum de modificació |
calia treure els duplicats |
||
Línia 74: | Línia 74: | ||
pages_by_size={} |
pages_by_size={} |
||
for size in sorted(pages.values()): |
for size in sorted(set(pages.values())): |
||
for title in pages: |
for title in pages: |
||
if pages_by_size.has_key(size): |
if pages_by_size.has_key(size): |
Revisió del 22:17, 17 març 2013
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os, sys, re, codecs
prevfolder = os.path.split(os.getcwd())[0]
sys.path.append(os.path.split(prevfolder)[0])
import wikipedia as pywikilib, pagegenerators as pg
pairs={
u"àáâäãăǎąåā": "a", u'æǣ': "ae",
u'ḃɓ': "b",
u'çćčćĉċ': "c",
u'đḍďḋ': "d", u"ð": "dz",
u'èéêëẽēę': "e",
u'ḟƒ': "f",
u'ĝġģğ': "g",
u'ĥħ': "h",
u'ìíîïīį': "i", u'ij': "ij",
u'ĵ': "j",
u'ķ': "k",
u'ŀļḷḹľł': "l",
u'ñńň': "n",
u'òóôöõøōǫ': "o",
u'œ': "oe",
u'ṗ': "p",
u'ŗřṛṝ': "r",
u'şṡšŝ': "s", u'ß': "sz",
u'ţṫṭ': "t",
u'Þ': "tz",
u'ùúûüŭūų': "u",
u'ẁŵẅƿ': "w",
u'ýỳŷÿȳỹ': "y",
u'źžż': "z"
}
diacritics = "".join(pairs.keys())
def simplify_chars(word):
#simplifiquem els diacrítics per a l'ordre alfabètic
word = word.lower()
for pair in pairs.keys():
for char in pair:
word = word.replace(char, pairs[pair])
word=word.replace(u"l·l","ll")
word = re.sub("\W","!", word)
return word
def sort_list(old_list):
#ordena una llista
simplifiedlist={}
for word in old_list:
simplifiedlist[simplify_chars(word)]=word
new_list=[]
for word in sorted(simplifiedlist.keys()):
new_list.append(simplifiedlist[word])
return new_list
def get_referred_page(page):
refpage = pywikilib.Page(site, page)
allpages = pg.ReferringPageGenerator(refpage, onlyTemplateInclusion=True)
articles = pg.NamespaceFilterPageGenerator(allpages, 0) # només espai de nou
return articles #pg.PreloadingGenerator(articles, pageNumber = 50)
def main():
pages = {}
for page in get_referred_page("Template:Segona llegida"):
pages.update({page.title(): len(page.get())})
f=codecs.open(u"segonallegida-ordreafabètic.log", "w", "utf8")
for page in sort_list(pages.keys()):
line = u"%s %i O." % (page, pages[page])
pywikilib.output(line)
f1.write(u"%s\n" % line)
f1.flush()
f.close()
pages_by_size={}
for size in sorted(set(pages.values())):
for title in pages:
if pages_by_size.has_key(size):
if pages[title] == size: pages_by_size[size].append(title)
elif not pages_by_size.has_key(size):
if pages[title] == size:
pages_by_size[size]=[title]
f=codecs.open("segonallegida-ordremida.log","w","utf8")
for size in reversed(sorted(pages_by_size.keys())):
if len(pages_by_size[size])>1:
pages = "\n".join(pages_by_size[size]).replace("\n", " %i\n" % size)
else:
pages=pages_by_size[size][0]
line = u"%s %i c." % (pages, size)
#pywikilib.output(line)
f.write(u"%s\n" % line)
f.flush()
f.close()
f=codecs.open(u"segonallegida-ordremida.log","r","utf8")
print len (lines), len(f.readlines())
f.close()
if __name__ == '__main__':
site=pywikilib.getSite("ca","wikipedia")
main()