Viquipèdia:Viquitrobada 2009/Taller de bots/traductor gencat
Aparença
# -*- coding: utf-8 -*-
#!/usr/bin/env python
#[[w:ca:Viquipèdia:Viquitrobada 2009/Taller de bots/traductor gencat]]
import codecs, httplib, json, re, time, urllib, urllib2
from time import strftime as date
import wikipedia as pywikibot
#variables for browser
user_agent=u'Mozilla/5.0 (cawiki autotranslator 1.0)'
cache_control=u'no-cache'
pragma=u'no-cache'
accept_charset='utf-8'
encoding='utf-8'
#variables for translator
oc_dirs = {
"ca-oc": {"dir": "ca-oc", "code": "ca"},
"ca-aran": {"dir": "ca-oc_aran", "code": "ca"},
"es-oc": {"dir": "es-oc", "code": "es"},
"es-aran": {"dir": "es-oc_aran", "code": "es"},
"oc-ca": {"dir": "oc-ca", "code": "oc"},
"aran-es": {"dir": "oc_aran-ca", "code": "oc"},
"oc-es": {"dir": "oc-es", "code": "oc"},
"aran-es": {"dir": "oc-es_aran", "code": "oc"}
}
not_oc_dirs = {
"en-ca": {"dir": "ENGLISH-CATALAN", "code": "en"},
"fr-ca": {"dir": "FRENCH-CATALAN", "code": "fr"},
"de-ca": {"dir": "GERMAN-CATALAN", "code": "de"},
"es-ca": {"dir": "SPANISH-CATALAN", "code": "es"},
}
def get(title, dir, path, action=""):
dirs = oc_dirs[dir]
url_title = urllib.quote(title)
target_url = "http://%(code)s.wikipedia.org/%(path)s%(title)s%(axn)s" % {"code": dirs["code"], "title": url_title, "axn": action, "path": path}
url = "http://traductor.gencat.cat/jsp/content.jsp?dir=%(dir)s&marca=false&url=%(url)s" % {"url": target_url, "dir": dirs["dir"]}
print url
data=None
retry=0
while data is None:
try:
request=urllib2.Request(url)
request.add_header('User-Agent', user_agent)
request.add_header('Cache-Control', cache_control)
request.add_header('Pragma', pragma)
request.add_header('Accept-Charset', accept_charset)
response=urllib2.urlopen(request)
data=response.read()
response.close()
except KeyboardInterrupt:
print u"l'usuari ha cancel·lat"
data=""
break
except Exception, e:
print e, retry
retry+=1
if retry>=5:
data=""
break
return data
def post(title, dir):
dirs = not_oc_dirs[dir]
url = u"http://%s.wikipedia.org/wiki/%s?action=edit" % (dirs['code'], urllib.quote(title))
params ={
"translationDirection": dirs['dir'],
"subjectArea": "GV",
"MARK_ALTERNATIVES": "0",
"url": url
}
params = urllib.urlencode(params)
headers= {
'User-Agent': user_agent,
"Content-type": "application/x-www-form-urlencoded",
"Accept": "text/plain",
}
data=None
retry=0
while data is None:
try:
conn = httplib.HTTPConnection('traductor.gencat.net')
conn.request("POST", "/url.do", params, headers)
response = conn.getresponse()
data = response.read()
except KeyboardInterrupt:
print u"l'usuari ha cancel·lat"
data=""
break
except Exception, e:
print e, retry
retry+=1
if retry>=5:
data = ""
break
return data
def main():
args = pywikibot.handleArgs()
dir = None; title = None; path="wiki/"; action = ""
#parsing parameters from command line.
for arg in args:
if arg[0] != "-": continue
key = arg[1:].split(":", 1)[0]
value = arg[1:].split(":", 1)[1] if ":" in arg else ""
if key in ("t", u"títol", "title"):
title = value
elif key in ("l", u"llengües", "langs", "languages"):
dir = value
elif key == "a":
if value =="j": action = "&action=raw&ctype=text/javascript"
elif value == "e": action = "&action=edit"
else: action = ""
if path == "wiki/":
action = action.replace("&","?",1)
#fixing wrong or empty parameters values.
if not title:
title = pywikibot.input(u"pàgina:") or u"Esperanto"
if dir and dir not in not_oc_dirs.keys() + oc_dirs.keys():
dir = pywikibot.input(u"llengües:") or "en-ca"
elif not dir:
dir = pywikibot.input(u"llengües:") or "en-ca"
#let's run!
if dir in not_oc_dirs:
txt = post(title, dir)
textarea = re.compile(ur"<textarea[^>]*?>([^<]*?)</textarea>", re.MULTILINE)
txt = textarea.sub(ur"\1", txt)
else:
txt = get(title, dir, path, action)
txt = re.sub(ur"(<span style='color:red'>)([^<]*?)(</span>)", r"\2", txt)
if txt:
txt = txt.decode("utf-8")
txt = pywikibot.html2unicode(txt)
filename = u"psq-Gomà-trad-%s-(%s)-%s.txt" % (date("%y%m%d%H%M"), dir, title)
f = codecs.open(filename, "w", 'utf-8')
f.write(txt)
f.close()
if __name__ == "__main__":
#Samples to type in command line: (for Catalan to Occitan).
# >traductor.py l:ca-oc t:Esperanto
# >traductor.py l:ca-oc t:Esperanto -a:e
# >traductor.py l:ca-oc t:Esperanto -a:j
# >traductor.py l:ca-oc t:Esperanto -p:w
# >traductor.py l:ca-oc t:Esperanto -p:w -a:e
# >traductor.py l:ca-oc t:Esperanto -p:w -a:j
main()