#! /usr/bin/python # ecashin@meili:~/script/quotations$ ./splitquot < quotations.html | ssh noserose.net 'cat > public_html/e/quotations.xml' # # I just spent about 50 minutes debugging this script only to find # out that it just doesn't work with Python 2.2.3. At home, with # version 2.5.2, it works fine. import sys, signal, cgi, re, pprint, string, time, sha, anydbm nowtime = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()) qdb = anydbm.open(sys.argv[0] + '.db', 'c') def title(i): q = re.sub(r'\n', ' ', string.join(quotes[i])) q = re.sub(r'\s+', ' ', q) q = re.sub(r'^\s+', '', q) q = re.sub(r'<.*?>', '', q) q = re.sub(r'^[- ]*', '', q) q = re.sub('((\w+\W+){7}).*', r'\1', q, 1) q = re.sub(r'[-\W]*$', '', q) return cgi.escape(q) def cmt(i): c = string.join(comments[i]) c = re.sub('— *', '', c, 1) return c def description(i): d = '
' + string.join(quotes[i]) + '
' d += '

—' + cmt(i) + '

' return cgi.escape(d) def guid(i): return sha.new(string.join(quotes[i])).hexdigest() def item(i): g = guid(i) date = nowtime if qdb.has_key(g): date = qdb[g] else: qdb[g] = date print ' ' print ' ' + title(i) + '' print ' ' + description(i) + '' print ' ' + date + '' print ' ' + g + '' print ' ' def items(): if len(quotes) != len(comments): print >> sys.stderr, "derp!" sys.exit(1) i = 0 while i < len(quotes): item(i) i += 1 def close_db(signo, frame): print 'closing database' qdb.close() exit(0) signal.signal(signal.SIGHUP, close_db) signal.signal(signal.SIGINT, close_db) start = re.compile('### BEGIN QUOTES ###') for line in sys.stdin: if start.search(line): break end = re.compile('### END QUOTES ###') squot = re.compile('
', re.IGNORECASE) equot = re.compile('
', re.IGNORECASE) hr = re.compile('', re.IGNORECASE) empty_par = re.compile(r'

\s*

', re.IGNORECASE) dash = re.compile('— *') quotes = [] comments = [] buf = [] for line in sys.stdin: if end.search(line): break elif squot.search(line): if len(buf) != 0: comments.append(buf) buf = [] elif equot.search(line): quotes.append(buf) buf = [] else: line = hr.sub('', line) line = empty_par.sub('', line) if line != '\n': buf.append(line) comments.append(buf) #pprint.pprint(comments) # pprint.pprint(quotes) #sys.exit(0) preamble = ''' Ed Cashin's Intriguing Quotes http://noserose.net/e/quotations.html Interesting quotations showing various perspectives. en-us ''' + nowtime + ''' ''' + nowtime + ''' http://blogs.law.harvard.edu/tech/rss Custom Stuff by Ed Cashin ecashin@noserose.net ecashin@noserose.net ''' sys.stdout.write(preamble) items() finish = ''' ''' sys.stdout.write(finish)