1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
| from pygtrans import Translate from bs4 import BeautifulSoup
import sys from urllib import request import feedparser
args = sys.argv URL="http://www.mckinsey.com/insights/rss"
BASE="/home/xxx/www/"
def tran(url=URL,out_dir=BASE+"mckinsey_rss.xml"): GT = Translate() content= request.urlopen(url).read().decode('utf8') content=content.replace('title>', 'stitle>') content=content.replace( '<pubDate>','<pubDate><span translate="no">') content=content.replace( '</pubDate>','</span></pubDate>') _text = GT.translate(content) with open(out_dir,'w',encoding='utf-8') as f: c=_text.translatedText c=c.replace('stitle>', 'title>') c=c.replace('<span translate="no">', '') c=c.replace('</span></pubDate>', '</pubDate>') f.write(c) print("GT: "+ url +" > "+ out_dir)
if len(args)==1: tran() else: tran(args[1],args[2])
def tran_nature(url="http://rss.acast.com/nature",out_dir=BASE+"nature_rss.xml"): GT = Translate() html_doc=request.urlopen(url).read().decode('utf8') soup = BeautifulSoup(html_doc)
items=soup.find_all('item') for idx,e in enumerate(items): if idx >8: e.decompose() content= str(soup) content=content.replace('title>', 'stitle>') content=content.replace( '<pubdate>','<pubDate><span translate="no">') content=content.replace( '</pubdate>','</span></pubdate>') _text = GT.translate(content) with open(out_dir,'w',encoding='utf-8') as f: c=_text.translatedText c=c.replace('stitle>', 'title>') c=c.replace('<span translate="no">', '') c=c.replace('</span></pubdate>', '</pubDate>') c=c.replace('>','>') f.write(c) print("GT: "+ url +" > "+ out_dir)
tran_nature()
|