Usuari:TronaBot/Python/els1000.py

Aquest mòdul necessita el mòdul common.py per a executar-se.
Hi ha una versió actualitzada al toolserver.
#! /usr/bin/python2.7
# -*- coding: utf-8 -*-
#$ -j y
#$ -o /home/pasqual/public_html/pywikimedia/usrlab/logs
#$ -m as

# Copyleft (!C) 2013 w:ca:Coet
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
    Implemented by w:ca:user:Coet

    This script pretends to update the wikitable for the 1000 articles project.
	(https://meta.wikimedia.org/wiki/List_of_articles_every_Wikipedia_should_have)

"""
importable = False
load_as = ""
help=""
access=["botop","staff","runop"]

#python modules
import codecs as cs
import os, re, sys, time, UserDict
from datetime import datetime
from platform import system as platfsys

#l'arquitectura que he creat per als meus scripts és ficar una carpeta
#al mateix nivell que la carpeta del pywikipedia, així que he de
#carregar els mòduls amb les següents línies.
from platform import system as platfsys
on_win = platfsys().lower() == "windows"
home = on_win and r"E:\\iShare\SugarSync\My Python scripts" \
	   or "/home/pasqual/public_html/"
for folder in ("pywikilib", "pyuserlib"):
	sys.path.append(os.path.join(home, folder))

#pywikipedia
import query as api
import wikipedia as pywikilib
import pagegenerators as pg
from config import usernames as logged_in_as

#user scripts
from func import format_number as fmt
from common import ArgumentHandler, Chrono, File
from common import blue, sort_list, yellow

class Article(pywikilib.Page):
	def __init__(self, article, site=None):
		"""
		This is a subclass of wikipedia.Page object.
		It contains some instances that aren't available with wikipedia.Page object.
		But also aliasing of instances that agree the Python conventions about naming
		variables and functions.
		"""
		if isinstance(article, (pywikilib.Page, Article)):
			site = article.site()
			title = article.title()
		elif isinstance(article, basestring):
			title = article
			if not site:
				site = pywikilib.getSite("ca","wikipedia")

		pywikilib.Page.__init__(self, site, title)
		self.wikidata = pywikilib.DataPage(self)
		self._content = None

	def interwikis(self):
		"""return a list of local and global interwikies"""
		local_iw = []
		for iw in self.interwiki():
			local_iw.append(Article(iw, iw.site()))
		global_iw = []
		for iw in self.wikidata.interwiki():
			global_iw.append(Article(iw, iw.site()))
		return local_iw + global_iw

	def get_interwiki(self, lang):
		"""return an Article object if a given lang interwiki exists for the Article"""
		for iw in self.interwikis():
			if iw.site().language() == lang:
				return Article(iw, iw.site())
		return None

	def fetch(self):
		"""get the content of the article if exists, else empty string ('')"""
		#new_text = re.sub(ur"(\[\[[a-z\-]{2,12}:[^\]]+?\]\]\s*)", "", text, re.M|re.S)
		if self.exists():
			text = self.get(get_redirect=True)
		else:
			text = ""
		return text

	def is_redirect(self):
		"""alias for self.isRedirectPage"""
		return self.isRedirectPage()

	def get_redirect_target(self):
		"""alias for self.getRedirectTarget"""
		return Article(self.getRedirectTarget())

	def language(self):
		"""return the language code of the site of the article"""
		return self.site().language()

	def red_links(self, get_links=False):
		"""
		returns red links to get the number of the wanted pages in the article
		if get_links is True, it returns a list of all red links, else it
		returns a two-tuple type, first element is the number of distincts red links,
		second is the number of all redlinks in the article.
		"""
		gplcontinue = True;	red_links = set(); all_red_links = 0
		params={
			'action'   : 'query',
			'generator': 'links',
			'titles'   : self.title(),
			'prop'     : 'info',
			'gpllimit' : 'max',
		}
		while gplcontinue:
			if isinstance(gplcontinue, basestring):
				params['gplcontinue']=gplcontinue
			data = pywikilib.query.GetData(params)
			gplcontinue = data['query-continue']['links']['gplcontinue'] if data.has_key("query-continue") else None
			ids = data['query']['pages'] if data.has_key("query") else []
			for id in ids:
				if int(id)<0: #negative ids are for missing pages (red links).
					red_links.add(ids[id]['title']) #dont add repeated links
					all_red_links += 1
		if get_links:
			return red_links
		else:
			return len(red_links), all_red_links

	def countable_chars(self):
		"""
		Remove interwiki and comment content, including the corresponding new line character and spaces.
		Then return the value of the length of the page.
		"""
		clean_content=re.sub("(?s)<!--[^>]+--> *\r?\n?", "", self.fetch())
		cmt_length = len(self.fetch()) - len(clean_content)

		iw_count  = 0
		iw_length = 0
		linksR = r"(\[\[[a-zA-Z\-]+\s?:[^\[\]\n]+\]\] *\r?\n?)"
		links  = re.findall(linksR, clean_content)
		for iw in links:
			lang = iw.split("[[")[1].split(":")[0]
			if not lang in self.site().family.langs.keys(): continue
			clean_content = clean_content.replace(iw, "")
			iw_length += len(iw)
			iw_count  += 1

		#print len(self.fetch()), cmt_length, iw_length, len(clean_content)
		return len(clean_content)

	def backlinks(self, get_links=False):
		"""return the number of pages that links to a page, or the list of these ones."""
		b_links = []
		params = {
			'action': 'query',
			'list': 'backlinks',
			'bltitle': self.title(),
			'blnamespace': 0,
			'blfilterredir': 'all',
			'bllimit': 'max',
		}
		blcontinue = True
		while blcontinue:
			if isinstance(blcontinue, basestring):
				params['blcontinue']=blcontinue
			try:
				data = pywikilib.query.GetData(params)
				b_links += data['query']['backlinks']
				blcontinue = data['query-continue']['backlinks']['blcontinue'] if data.has_key("query-continue") else False
			except:
				time.sleep(10)
		if get_links:
			return [b_link['title'] for b_link in b_links]
		else:
			return len(b_links)

	def embeddedin(self, get_links=False):
		"""return the number of pages where a page is embedded in, or the list of these ones."""
		e_links = []
		params = {
			'action': 'query',
			'list': 'embeddedin',
			'eititle': self.title(),
			'einamespace': 0,
			'eifilterredir': 'all',
			'eilimit': 'max',
		}
		eicontinue = True
		while eicontinue:
			if isinstance(eicontinue, basestring):
				params['eicontinue']=eicontinue
			data = pywikilib.query.GetData(params)
			e_links += data['query']['embeddedin']
			eicontinue = data['query-continue']['embeddedin']['eicontinue'] if data.has_key("query-continue") else False
		if get_links:
			return [e_link['title'] for e_link in e_links]
		else:
			return len(e_links)

	def get_linking_pages(self):
		"""Returns a pagegenerator of articles linking to the page."""
		refpage = pywikilib.Page(self.site(), self.title())
		allpages = pg.ReferringPageGenerator(refpage, onlyTemplateInclusion=True)
		articles = pg.NamespaceFilterPageGenerator(allpages, 0) # només espai de nom
		return articles #pg.PreloadingGenerator(articles, pageNumber = 50)

	def get_templates(self, get_templates=True):
		tpls = []
		params = {
			'action': 'query',
			'prop': 'templates',
			'titles': self.title(),
			#'tldir': 'all',
			'tlnamespace': 10,
			'tllimit': 'max',
		}
		tlcontinue = True
		while tlcontinue:
			if isinstance(tlcontinue, basestring):
				params['tlcontinue']=tlcontinue
			data = pywikilib.query.GetData(params)
			pageid = data['query']['pages'].keys()[0]
			tpls += data['query']['pages'][pageid]['templates']
			tlcontinue = data['query-continue']['templates']['tlcontinue'] if data.has_key("query-continue") else False

		if get_templates:
			return [tpl['title'].split(":",1)[1] for tpl in tpls]
		else:
			return len(tpls)

	def has_template(self, template):
		try:
			if template in self.templates():
				return True
		except KeyError:
			return False
		return False

	def has_any_template(self, template_list):
		for template in template_list:
			if self.has_template(template):
				return template
		return False

	def remove_template(self, template, summary):
		text = self.fetch()
		re_tpl="[%s%s]%s" %(template[0].upper(),template[0].lower(),template[1:])
		new_text = re.sub(r"\{\{ *%s *\}\}\s+?" % re_tpl, "", text,re.S)
		if args.edit:
			self.put(new_text, summary % template)
		else:
			pywikilib.showDiff(text, new_text)

	def add_template(self, template, summary):
		pywikilib.output("[[%s]]" % blue(self.title()))
		text = self.fetch()
		categories = re.compile(r"(\[\[ *[Cc]ategor(?:y|ia) *: *[^\]]+?\]\]\s*?)")
		iws = re.compile(r"(\[\[[a-z\-]{2,12}:[^\]]+?\]\]\s*?)")
		ordena = re.compile(r"(\{\{ORDENA:[^}]+?\}\}(?:\s*<!--ORDENA generat per bot-->)?)\s*")
		o = ordena.search(text)
		c = categories.findall(text)
		i = iws.findall(text)
		new_text=text
		new_text=ordena.sub("", new_text)
		new_text=categories.sub("", new_text)
		new_text=iws.sub("", new_text)
		end=["{{%s}}" % template]
		if o:
			end += ["", o.group(1)]
		if c:
			end += c
		if i:
			end += i
		new_text= "%s%s" % (new_text, "\n".join(end))
		if args.edit:
			self.put(new_text, summary % template)
		else:
			pywikilib.showDiff(text, new_text)

class Source(object):
	"""
	This object get the page content of the lists and keep them in a file, also returns data
	about the list, the topics of the list and its articles.
	"""
	def __init__(self):
		self.title = "List of articles every Wikipedia should have"
		self.local_title = u"Viquipèdia:Llista d'articles que totes les llengües haurien de tenir"
		self.local_title2 = u"Viquipèdia:Llista dels 1000 articles fonamentals"
		self.local_title3 = u"Usuari:TronaBot/log:Els 1000/llista temàtica"

	def list_from_local(self):
		#get list from cawiki and save it in a text file.
		site = pywikilib.getSite()
		page = pywikilib.Page(site, self.local_title3)
		topics={}; articles=[]
		content = page.get()
		content = content.split(u"== taules ==")[1]
		sorted_topics = re.findall("== *\[\[(?:[^|]+?\|)(?P<title>.+?)\]\] *==", content)

		for line in content.splitlines():
			t=re.search("(=+) \[\[(?:[^|]+?\|)(?P<title>.+?)\]\] (=+)", line)
			p=re.search("\|\| \[\[(?P<title>[^\|]*?)(?:\|.*)?\]\]", line)
			if t:
				topic=t.group(2).strip()
			if p:
				article=p.group(1).strip()
				articles.append(article)
				if topics.has_key(topic):
					topics[topic].append(article)
				else:
					topics[topic]=[article]
		f = File("cawiki", pref=True)
		f.prepare()
		i=0; j=0
		sorted_topics = (
			"Biografies", "Filosofia i psicologia", u"Religió", u"Societat",
			"Llengua i literatura", u"Ciència", "Tecnologia", "Cultura",
			u"Història", "Geografia"
		)
		for topic in sorted_topics:
			f.write_line(u"== %s ==\n" % topic)
			for article in topics[topic]:
				i+=1;j+=1
				f.write_line(u"{:>3} / {:>4} [[{}]]<br/>\n".format(i, j, article.strip()))
			i=0
			f.write_line("\n")
		f.close()
		return sorted_topics, topics, articles

	def list_from_meta(self):
		#get list from metawiki and save it in a text file.
		site = pywikilib.getSite("meta","meta")
		repo = pywikilib.getSite('wikidata', 'wikidata')
		page = pywikilib.Page(site, self.title)
		content = page.get()
		content = content.split("== How to use this list ==")[1].split("[[Category:")[0]
		sorted_topics = re.findall("[^=](?:== +?)(?P<topic>.*?)(?: +?==)", content)
		topics={}; articles=[]
		for line in content.splitlines():
			t=re.search("(?P<bgn>=+ *?)(?P<topic>.*?)(?P<end> *?=+)", line)
			if t:
				if t.group("topic").strip() not in sorted_topics: continue
				topic=t.group("topic").strip()
			p=re.search("\[\[d: *(?P<item>Q\d+) *\|(?P<title>[^\]]+?)\]\]", line)
			if p:
				if not topics.has_key(topic):
					topics[topic]=[]
				item = p.group("item").strip()
				data = pywikilib.DataPage(repo, item)
				articles = data.interwiki()
				for iw in articles:
					if iw.site().lang == 'en':
						pywikilib.output(iw.title(asLink=True,textlink=True))
						#pywikilib.output(iw.title())
						article=iw.title()
						topics[topic].append(article)
						articles.append(article)
						break
		ft=File("meta", pref=True)
		i=0; j=0
		ft.prepare()
		for topic in sorted_topics:
			ft.write_line(u"== %s ==\n" % topic)
			for article in topics[topic]:
				i+=1;j+=1
				ft.write_line(u"{:>3} / {:>4} [[:en:{}]]<br/>\n".format(i, j, article.strip()))
			i=0
			ft.write("\n")
		ft.close()
		return sorted_topics, topics, articles

	def list_from_file(self, suffix):
		"""returns sorted_topics, topic, articles"""
		#get list from file
		content = File(suffix, pref=True).read()

		sorted_topics = re.findall("== (.+?) ==", content)
		topics={}; articles=[]
		for line in content.splitlines():
			t=re.search("(?P<bgn>=+ *?)(?P<topic>.*?)(?P<end> *?=+)", line)
			p=re.search("\[\[(?::en:)??(?P<title>[^\]]+?)\]\]", line)
			if t:
				topic=t.group("topic").strip()
			if p:
				article=p.group("title")
				articles.append(article)
				if topics.has_key(topic):
					topics[topic].append(article)
				else:
					topics[topic]=[article]
		return sorted_topics, topics, articles

	def check_lists(self):
		#compare lists from local to English.
		sorted_topics, topics, art_list = self.list_from_meta()
		local_sorted_topics, local_topics, local_art_list = self.list_from_local()
		pywikilib.output("tenim %i articles. meta: %i" % (len(local_art_list), len(art_list)))
		enwiki=pywikilib.getSite("en")
		i=0; must_be_added=[]; must_be_fixed=[]
		must_be_removed = list(local_art_list)
		without_iw = []
		lines=[]
		for topic in sorted_topics:
			line= u"== %s ==" % topic
			lines.append(line)
			pywikilib.output(line)
			for title in topics[topic]:
				i+=1
				pywikilib.output(u"%i [[:en:%s]]" % (i, title))
				art = Article(title, enwiki)
				line = u"# [[:en:%s]]" % (art.title())
				if art.get_interwiki("ca"):
					local_art = art.get_interwiki("ca")
					if local_art.is_redirect():
						pywikilib.output(u"\t%s és una redirecció!" % local_art.title())
						line += u' → <span style="background-color:yellow;">[[%s]]</span>' % local_art.title()
						must_be_fixed.append(local_art.title())
					elif local_art.title() not in local_art_list:
						pywikilib.output(u"\t%s no és a la llista!" % local_art.title())
						line += u' → <span style="background-color:orange;">[[%s]]</span>' % local_art.title()
						must_be_added.append(local_art.title())
					else:
						line += u" → [[%s]]" % local_art.title()
						must_be_removed.remove(local_art.title())
				else:
					line += u' → <span style="background-color:red;">sense iw!!!</span>'
					print "\tsense iw!!!"
					without_iw.append(title)
				lines.append(line)
			lines.append("")
		if must_be_removed:
			line = 	"Cal treure %i articles: [[%s]]\n"  % (
				len(must_be_removed),
				"]], [[".join(must_be_removed)
			)
			lines.insert(0, line)
			pywikilib.output(line)
		if must_be_added:
			line = "Cal afegir %i articles: [[%s]]\n" % (
				len(must_be_added),"]], [[".join(must_be_added)
			)
			lines.insert(1, line)
			pywikilib.output(line)
		if must_be_fixed:
			line = "Cal rectificar %i articles: [[%s]]\n" % (
				len(must_be_fixed),"]], [[".join(must_be_fixed)
			)
			lines.insert(2, line)
			pywikilib.output(line)
		if without_iw:
			line = "Cal cercar %i articles: [[:en:%s]]\n" % (
				len(without_iw),"]], [[:en:".join(without_iw)
			)
			lines.insert(3, line)
			pywikilib.output(line)
		if not must_be_removed and not must_be_added and not without_iw:lines.append("")
		ft=File("check", pref=True)
		ft.save("\n".join(lines))

class Counter(object):
	"""
	Quick object to get an efficient counter. It doesn't need assign a value
	All other method isn't neccessary.
	Only __getattr__ is needed, because of default value is requiered for a
	a new attribute of the object."""
	def __getattr__(self, attr):
		if not hasattr(self, attr):
			setattr(self, attr, 0)
		return getattr(self, attr)

class Project(object):
	def __init__(self):
		"""
		This object represents the 1000 artcile project.
		It builds the wikitable.
		"""
		self.site = pywikilib.getSite("ca","wikipedia")
		self.log_title = u"Usuari:%s/log:Els 1000/llista" % logged_in_as['wikipedia']['ca']
		self._summary = Counter()

		self._summary.absent = 0
		self._summary.stubs = 0
		self._summary.stub2 = 0
		self._summary.articles = 0
		self._summary.articles2 = 0
		self._summary.long_articles = 0
		self._summary.not_eval_FA = 0
		self._summary.FA_candidates = 0
		self._summary.good_articles = 0
		self._summary.feature_articles = 0

		#Categoria:Plantilles d'articles fonamentals
		self.feature_templates = (
			"Proposta AdQ", "PropostaAdQ", "Article bo", "1000+AdQ", "Article de qualitat"
		)
		self.feature_removed =("RetiradaAdQ") #on talk
		self.proposed_articles = ("Proposta AdQ", "PropostaAdQ") #feature_templates[:2]
		self.good_articles = ("Article bo",) #feature_templates[2]
		self.feature_articles = ("1000+AdQ", "Article de qualitat") #feature_templates[3:]
		self.W100_templates = ("100","100+AB","100+AdQ","100 text","100+1000","100+1000+AB","100+1000+AdQ")
		self.W1000_templates = (
			"100+1000", "100+1000+AB", "100+1000+AdQ", "1000", "1000+AB", "1000+AdQ",
			"1000 Cultura i oci", "1000 Biografies", u"1000 Ciència", "1000 Filosofia i psicologia",
			u"1000 Història i geografia", "1000 Llengua i literatura", u"1000 Religió", "1000 Societat",
			"1000 Tecnologia"
		)
		self.avg_size=1.1 #.971 3-05-13

	def art_length_counter(self, i):
		#absent
		if i == 0:
			self._summary.absent += 1
		#stubs 1. Too short!
		elif i < 4000 and i >= 1:
			self._summary.stubs += 1
		#stubs 2. It can reach 10000!
		elif i < 10000 and i >= 4000:
			self._summary.stubs2 += 1
		#article 1. Nice weight!
		elif i < 20000 and i >= 10000:
			self._summary.articles += 1
		#article 2. It can reach 30000!
		elif i < 30000 and i >= 20000:
			self._summary.articles2 += 1
		#long article 2
		elif i >= 30000:
			self._summary.long_articles += 1

	def feature_cell(self, template):
		#file of F/B-A templates
		if template in self.feature_templates:
			if template in self.proposed_articles:
				s = "Nuvola filesystems folder.png|link=VP:PAdQ|20px"
				self._summary.FA_candidates += 1
			elif template in self.good_articles:
				s = "Nuvola apps kontact.png|link=VP:AB|20px"
				self._summary.good_articles += 1
			elif template in self.feature_articles:
				s = "Distintiu de qualitat.png|link=VP:AdQ|15px"
				self._summary.feature_articles += 1
			if s:
				return 'align="center" | [[Fitxer:%s]]' % s
		else:
			self._summary.not_eval_FA += 1
			return u'align="center" | —'

	def delta(self, n, f="i"):
		#summary diff
		if n==0:
			return "[0]"
		if f == "f":
			return "[%+.2f]" % n
		else:
			return "[%+i]" % n

	def cell_colour(self, s):
		#colour of the size cell
		c=""
		if s < 4000 and s >= 1:
			c="red;"
		elif s < 10000 and s >= 4000:
			c="#FFA500;"
		elif s < 20000 and s >= 10000:
			c="yellow;"
		elif s < 30000 and s >= 20000:
			c="#ADFF2F;"
		elif s > 30000:
			c="#00FF00;"

		if c: c = ' align="right" style="background-color:%s"|' % c
		else: c +=  ' align="right"|'

		return c

	def get_last_update(self):
		#get data about last update that appears in summary in the history.
		page = pywikilib.Page(self.site, "%s general" % (self.log_title,))
		"0:id, 1:timestamp, 2:user, 3:comment, 4:size, 5:tags"
		history = page.getVersionHistory(revCount=10)
		ts=None; cmt=None
		for rev in history:
			if rev[2] == logged_in_as['wikipedia']['ca'] and rev[3].startswith("per fer: "):
				ts=rev[1]
				cmt=rev[3]
				break
		return ts, cmt

	def update_tables(self, from_file=True):
		#update the tables.
		if from_file:
			sorted_topics, topics, articles = source.list_from_file("cawiki")
		else:
			sorted_topics, topics, articles = source.list_from_local()
		indexes = {}
		wanted_pages = 0
		grand_size = 0
		for title in articles:
			article = Article(title)
			length = article.countable_chars()
			self.art_length_counter(length)
			meta_idx = length*self.avg_size
			try:
				template = [tpl for tpl in self.feature_templates if tpl in article.templates()]
				template = template and template[0] or ""
				has_100_too = bool([tpl for tpl in self.W100_templates if tpl in article.templates()])
			except:
				template=""; has_100_too=False
			red_links=article.red_links()[0]
			wanted_pages += red_links
			grand_size += length
			if not indexes.has_key(meta_idx):
				indexes[meta_idx]=[]
			indexes[meta_idx].append(
				{
					"title": article.title(),
					"style": self.cell_colour(meta_idx),
					"quality": self.feature_cell(template),
					"100": has_100_too,
					"redlinks": red_links,
					"length": length,
					"needed": float((30000-length)) #/self.avg_size 03-06-13
				}
			)
			i = articles.index(title)+1
			if i % 100 == 0 or i == 1:
				pywikilib.output(
					u"{:>4} {} {} {} {} {} {}".format(
						i,
						title,
						length,
						meta_idx,
						template,
						has_100_too,
						red_links
					)
				)

		summary = self._summary

		# ------------------------------------------------------
		#- Create a wikitable with the data from registre.
		wpl = u"[[:Usuari:%s/log:Els 1000/pàgines demanades|enllaços<br/>vermells]]" % logged_in_as['wikipedia']['ca'] #wanted articles link
		i=0
		table=u'{| class="sortable wikitable"\n! id !! article !! índex<br/>meta !! qualitat !! %s !! calen' % wpl
		for idx in reversed(sorted(indexes.keys())):
			for page in indexes[idx]:
				i+=1
				tmpl = {
					"i"       : i,
					"title"   : page['title'],
					"idx"     : fmt(idx, 0, thsep=" "),
					"style"   : page['style'],
					"quality" : page["quality"],
					"redLk"   : page["redlinks"],
					"needed"  : 'align="right"| %s' % fmt(page["needed"], 0, thsep=" ") if page["length"] < 30000 else "",
				}
				table += u'\n|-\n| %(i)i || [[%(title)s]] ||%(style)s %(idx)s || %(quality)s || align="right" | %(redLk)i ||%(needed)s' % tmpl
		table+=u"\n|}\n"

		# ------------------------------------------------------
		# - Create same table but sliced by topics.
		tables=""
		for topic in sorted_topics:
			catlink= u"[[Viquipèdia:Llista d'articles que totes les llengües haurien de tenir/Per millorar/%(cat)s|%(cat)s]]" %{"cat":topic}
			tables += u"\n=== %s ===" % catlink
			tables +=u'\n{| class="sortable wikitable"\n! id !! article !! índex<br/>meta || qualitat !! %s !! calen' % wpl
			i=0
			for idx in reversed(sorted(indexes.keys())):
				for page in indexes[idx]:
					if page['title'] in topics[topic]:
						i+=1
						tmpl={
							"i"       : i,
							"title"   : page['title'],
							"idx"     : fmt(idx, 0, thsep=" "),
							"style"   : page['style'],
							"quality" : page["quality"],
							"redLk"   : page["redlinks"],
							"needed"  : 'align="right"| %s' % fmt(page["needed"], 0, thsep=" ") if page["length"] < 30000 else "",
						}
						tables += u'\n|-\n| %(i)i || [[%(title)s]] ||%(style)s %(idx)s || %(quality)s || align="right" | %(redLk)i ||%(needed)s' % tmpl
			tables += "\n|}\n"

		# ------------------------------------------------------
		# - Fetch old edit summary and create new summary table.
		"""
		Let's calculate scores

		rawscore = stubs + articles*4 + long_articles*9
		maxscore = (absent + stubs + articles + long_articles)*9
		score = rawscore / maxscore * 100
		"""
		absent= summary.absent
		stubs = summary.stubs + summary.stubs2
		articles = summary.articles + summary.articles2
		long_articles = summary.long_articles

		rawscore = stubs + (articles * 4) + (long_articles * 9)
		maxscore = (absent + stubs + articles + long_articles) * 9
		score = "%.2f" %(100.0 * rawscore / maxscore)

		last_absent, last_stubs, last_arts, last_long, last_score, last_grand_size = 0, 0, 0, 0, 0, 0
		score_RE=re.compile(
			ur"per fer: (?P<abs>\d+) \[[+-]?\d+\]; "
			"esb\.: (?P<stubs>\d+) \[[+-]?\d+\]; "
			"art\.: (?P<arts>\d+) \[[+-]?\d+\]; "
			"art\. llargs: (?P<long>\d+) \[[+-]?\d+\]; "
			"resultat: (?P<score>[\d\.]+) \[[+-]?[\.\d]+\]"
			ur" \| total caràcters: (?P<size>[\d ]+) \[[+-]?[\d ]+\]")
		timestamp, comment = self.get_last_update()
		m=re.match(score_RE, comment)
		if m:
			data=m.groupdict()
			last_absent= int(data['abs'])
			last_stubs=int(data['stubs'])
			last_arts=int(data['arts'])
			last_long=int(data['long'])
			last_score=float(data['score'])
			last_grand_size=int(data['size'].replace(" ",""))
		(
			abs_diff,
			stb_diff,
			art_diff,
			lng_diff,
			scr_diff,
			sz_diff
		) = (
			absent-last_absent,
			stubs-last_stubs,
			articles-last_arts,
			long_articles-last_long,
			float(score)-last_score,
			grand_size-last_grand_size
		)
		comment =  u"per fer: %s %s; esb.: %s %s; art.: %s %s; art. llargs: %s %s; resultat: %s %s | total caràcters: %s %s" % (
			absent, self.delta(abs_diff),
			stubs, self.delta(stb_diff),
			articles, self.delta(art_diff),
			long_articles, self.delta(lng_diff),
			score, self.delta(scr_diff,"f"),
			fmt(grand_size, 0, thsep=" "), self.delta(sz_diff)
		)

		tpl = {
			"stub": stubs, "art": articles, "long": long_articles,
			"raw": rawscore, "max": maxscore, "sco": score,
		}
		summary_text  = '\n\r{| class="wikitable"\n'
		summary_text += u"! esborranys !! articles !! articles llargs !! resultat brut !! resultat màxim !! resultat final"
		summary_text += "\n|-\n| %(stub)i || %(art)i || %(long)i || %(raw)i || %(max)i || %(sco)s\n|}" % tpl
		summary_text += '\n{| class="wikitable"\n'
		summary_text += u'! no avaluats !! propostes !! art. bons !! AdQ !! <span title="en caràcters (sense comentaris ni iw)">mida total<br/>articles</span>'
		summary_text += "\n|-\n| %i || %i || %i || %i || %s\n|}\n<!--total articles demanats: %i-->\n" % (
			summary.not_eval_FA, summary.FA_candidates, summary.good_articles, summary.feature_articles, fmt(grand_size, 0, thsep=" "), wanted_pages
		)

		# ------------------------------------------------------
		# - Now, log and edit.
		header = u"== taules ==\nDarrera actualització: ~~~~~.\n"
		running_time = chrono.running_time()
		footer = "<!--realitzat en: %sh %sm %ss-->\n" % (chrono.hours, chrono.minutes, chrono.seconds)

		themes=[
			("general", table),
			(u"temàtica", tables)
		]
		for theme in themes:
			if isinstance(args.skip, basestring) and theme[0] in args.skip: continue
			f = File(u"(%s)" % (theme[0].replace(u"à","a")), pref=True, sep="_")
			new_text = u"%s%s" % (theme[1] , summary_text)
			f.save(new_text)

			page = pywikilib.Page(self.site, u"%s %s" %(self.log_title, theme[0]))
			text = page.get() if page.exists() else ""
			old_text = text.split("== taules ==")[0]
			new_text = u"%s%s%s%s%s" % (old_text, header, summary_text, new_text, footer)
			if args.edit and old_text != new_text:
				page.put(new_text, comment)
			else:
				pywikilib.output(comment)

	def get_wanted_pages(self):
		"""Registrem el nombre d'enllaços rojos i nombre de pàgines demanades"""

		wanted_pages_dict={}; red_links_dict={}
		sorted_topics, topics, titles = source.list_from_file("cawiki")
		i=0
		for title in titles:
			i+=1
			pywikilib.output("%i.- [[%s]]" %(i, title), newline=False)
			art = Article(title)

			red_links = art.red_links(get_links=True)
			len_rl=len(red_links)
			if not red_links_dict.has_key(len_rl):
				red_links_dict[len_rl]=[]
			red_links_dict[len_rl].append(title)

			for page_title in red_links:
				if not wanted_pages_dict.has_key(page_title):
					pages2+=1
					wanted_pages_dict[page_title]=Article(page_title).backlinks()
				else:
					pywikilib.output("\tbl: %i" % wanted_pages_dict.get(page_title), newline=False) #already scanned
			pywikilib.output("\twp: %i" % pages2)
		File("redlinks", pref=True).backup(red_links_dict)
		File("wantedpages", pref=True).backup(wanted_pages_dict)

	def wanted_pages(self):
		wanted_pages_dict = File("wantedpages", pref=True).load()
		red_links_dict = File("redlinks", pref=True).load()
		for k in red_links_dict.copy():
			red_links_dict[int(k)] = red_links_dict.pop(k)
		reversed_wpages = {}
		for page in wanted_pages_dict:
			links_num=wanted_pages_dict[page]
			if not reversed_wpages.has_key(links_num):
				reversed_wpages[links_num]=[]
			reversed_wpages[links_num].append(page)
		File("rvs_wpages", pref=True).backup(reversed_wpages)

		#Alcem el registre dels articles demanats ordenats pel nombre de peticions en un fitxer i a una pàgina de la VP
		pywikilib.output(u"\n\n\n=== %s pàgines demanades ===" % yellow("comencem"))
		f=File("wantedpages", pref=True, timestamp=True)
		f.prepare()
		for n in reversed(sorted(reversed_wpages.keys())):
			f.write_line(u"== %i  ==" % n)
			i=0
			for page in sort_list(reversed_wpages[n]):
				line=u"# [[:%s]]\n" % page
				i+=1
				if len(reversed_wpages[n]) > 50 and i % 25 == 0 and len(reversed_wpages[n]) - i > 25:
					line += u"#;%i pàgines demanades" % n
				f.write_line(line)
			f.write_line()
			f.flush()

		stats = "scanned pages: %i. wantedpages: %i" % (1000, len(wanted_pages_dict))
		timestamps = chrono.stop()
		f.write_line("\n\n<!--%s\n%s-->" % (stats,  timestamps))
		pywikilib.output(u"%s\n%s" % (stats,timestamps))
		fname = f._filename
		f.close()

		text=File(fname, pref=True).read()
		text = text.split(u"== 4 enllaços ==")[0]
		print len(text)
		page = pywikilib.Page(self.site, u"User:%s/log:Els 1000/pàgines demanades" % logged_in_as['wikipedia']['ca'])
		if args.edit:
			page.put(text, u"Actualitzant dades, %i pàgines demanades als 1000." % len(wanted_pages_dict))
		else:
			pywikilib.output(u"Actualitzant dades, %i pàgines demanades als 1000." % len(wanted_pages_dict))

		#Alcem el registre del nombre d'enllaços rojos en cadascun dels 1000 articles en un fitxer de text.
		f=File("redlinks", pref=True, timestamp=True)
		f.prepare()
		total=0
		for n in reversed(sorted(red_links_dict.keys())):
			i=int(n)
			f.write_line(u"== %i enllaços rojos ==" % i)
			total += i * len(red_links_dict[n])
			for page in sorted(red_links_dict[n]):
				line=u"# [[:%s]]" % page
				f.write_line(line)
			f.write_line()
		f.write_line("\n\n<!--%s\n%s\ntotal: %i\n-->" % (stats,  chrono.stop(), total))
		f.close()

	def check_templates(self):
		sorted_topics, topics, titles = source.list_from_file("cawiki")
		remove = []
		print "REMOVE"
		for template in self.W1000_templates:
			template = Article("Plantilla:%s" % template)
			for article in template.embeddedin(get_links=True):
				art = Article(article)
				if not art.title() in titles:
					remove.append(art.title())
		print "remove", len(remove)
		print "ADD"
		add = []
		for title in titles:
			art = Article(title)
			if not art.has_any_template(self.W1000_templates):
				pywikilib.output("\t%s" %art.title(asLink=True))
				add.append(art.title())
		print "add", len(add)
		data = {
			"remove": remove,
			"add": add
		}
		f = File("templates", pref=True)
		f.backup(data)
		#f.open()

	def fix_templates(self):
		display = pywikilib.output
		removable = (
			"1000", "1000 Cultura i oci", "1000 Biografies", u"1000 Ciència", u"1000 Ciències naturals",
			"1000 Filosofia i psicologia", u"1000 Història i geografia", "1000 Geografia",
			"1000 Llengua i literatura", "1000 Llenguatge", u"1000 Religió", "1000 Societat",
			"1000 Tecnologia"
		)
		stilling = {
			"100+1000": "100",
			"100+1000+AB": "100+AB",
			"100+1000+AdQ": "100+AdQ",
			"1000+AB": "Article bo",
			"1000+AdQ": "Article de qualitat",
		}
		templates = File("templates", pref=True).load()
		sorted_topics, topics, articles = source.list_from_file("cawiki")
		articles={}
		for topic in topics:
			for article in topics[topic]:
				articles[article]=topic
		for title in templates['remove']:
			art = Article(title)
			rm_template = art.has_any_template(removable)
			if rm_template:
				display("%s removing {{%s}}" %(art.title(asLink=True), rm_template))
				art.remove_template(rm_template, "Bot: s'ha eliminat la plantilla {{%s}}, l'article ja no forma part del projecte")
			rm_template = art.has_any_template(stilling)
			if rm_template:
				display("%s removing {{%s}} -1000" %(art.title(asLink=True), rm_template))
		for title in templates['add']:
			art = Article(title)
			if art.is_redirect():
				art = art.get_redirect_target()
			art.add_template("1000 %s" % articles[title], u"Bot: afegint plantilla {{%s}}, s'ha inclós l'article al [[Viquiprojecte:Els 1.000|projecte]].")

	def remove_templates(self):
		"""
		Els articles que siguin més d'una varietat i que portin plantilles separades,
		siguin substituïdes per les noves plantilles coordinades (s'alineen millor les icones). Són:

			{{100}} + {{article de qualitat}} = {{100+AdQ}}
			{{100}} + {{article bo}} = {{100+AB}}
			{{1000}} + {{article de qualitat}} = {{1000+AdQ}}
			{{1000}} + {{article bo}} = {{1000+AB}}
			{{100}} + {{1000}} = {{100+1000}}
			{{100}} + {{1000}} + {{article de qualitat}} = {{100+1000+AdQ}}
			{{100}} + {{1000}} + {{article bo}} = {{100+1000+AB}}

		Atenció!

		Vigileu programar els bots de manera que canviin també els paràmetres dels
		articles dels 1000, ja que aquests es divideixen en àrees, per exemple:

			{{1000 Filosofia}} + {{article de qualitat}} = {{1000+AdQ|Filosofia}}
			{{1000 Tecnologia}} + {{article bo}} = {{1000+AB|Tecnologia}}
			{{100}} + {{1000 Ciències naturals}} = {{100+1000|Ciències naturals}}
			{{100}} + {{1000 Religió}} + {{article de qualitat}} = {{100+1000+AdQ|Religió}}
			{{100}} + {{1000 Biografies}} + {{article bo}} = {{100+1000+AB|Biografies}}

		Gràcies per tot, espero que un bot ho pugui dur a terme. --Xavier D. (A disposar!) 17:30, 12 des 2009 (CET)
		"""
		sorted_topics, topics, articles = source.list_from_file("cawiki")
		templ = lambda x: re.compile(ur"\{\{ *%s *\}\}\s+" % x)
		for art in articles:
			article = Article(art)
			content = article.fetch()
			templates = article.get_templates()
			is_100="";is_100="";is_FA="";is_GA="";is_1000=""
			for tpl in templates:
				if "1000" in tpl:
					topic=tpl.split()[1]
				if tpl=="100":is_100="100+"
				if tpl=="Article de qualitat":is_FA="+AdQ"
				if tpl=="Article bo":is_GA="+AB"
			new_tpl="{dels100}{dels1000}{AB}{AdQ}".format(dels100=is_100, dels1000=is_1000, AB=is_GA, AdQ=is_FA)
			if is_100:
				content = templ("100").sub("", content)
			if is_GA:
				content = templ("[Ar]rticle bo").sub("", content)
			if is_FA:
				content = templ("[Ar]rticle de qualitat").sub("", content)
			article.add_template(new_tpl)

def test():
	display = pywikilib.output
	i=0;j=0
	for title in project.W1000_templates:
		template = Article(u"Plantilla:%s" % title)
		for a in template.embeddedin(get_links=True):
			a=Article(a)
			i+=1;j+=1
		display("%s %i" %(template.title(asLink=True),j))
		j=0
	print i

def show_help():
	pywikilib.output(
	u"""
	* actualització diària:
	  python2.7 llista_mils.py -update -frompage -edit

	* actualització diària:
	  python2.7 llista_mils.py -update -edit

	* actualització setmanal (inclou revisió d'articles entre la llista de meta i la local)
	  python2.7 llista_mils.py -check -update

	* actualització setmanal amb canvi dels articles diferents (no implementat encara)
	  python2.7 llista_mils.py -check -update -modify
	"""
	)

if __name__ == "__main__":
	#global vars
	args = ArgumentHandler()
	args.parse_arguments()
	chrono = Chrono()
	pywikilib.output(u"[{time}] Beginning...".format(time=yellow(chrono.start_time.strftime("%H:%M:%S"))))
	try:
		source = Source()
		project = Project()
		if not args.from_page and int(time.strftime("%H"))<5:args.from_page=True

		#if args.help:
		#	show_help()
		if args.test:
			test()
		elif args.update and args.check:
			source.check_lists()
			project.update_tables()
		elif args.update and not args.check:
			#actualitza la llista
			project.update_tables(not args.from_page)
		elif args.check and not args.update:
			#actualitza el registre comparant la llista de meta i la local.
			source.check_lists()
		elif args.wantedpages:
			#actualitza el nombre d'enllaços rojos.
			project.wanted_pages()
		elif args.chtpl:
			#verifica on són les plantilles locals.
			project.check_templates()
		elif args.fixtpl:
			#retira i afegix les plantilles locals.
			project.fix_templates()
		elif args.rmtpl:
			project.remove_templates()
		elif args.meta:
			#actualitza fitxer de dades de meta.
			sorted_topics, topics, articles = source.list_from_meta()
			print sorted_topics
		elif args.cawiki:
			#actualitza fitxers de dades locals.
			sorted_topics, topics, articles = source.list_from_local()
			print sorted_topics
	except KeyboardInterrupt:
		pass
	pywikilib.output(chrono.stop())
	pywikilib.stopme()