#! /usr/bin/python # ecashin@meili:~/script/quotations$ ./splitquot < quotations.html | ssh noserose.net 'cat > public_html/e/quotations.xml' # # I just spent about 50 minutes debugging this script only to find # out that it just doesn't work with Python 2.2.3. At home, with # version 2.5.2, it works fine. import sys, signal, cgi, re, pprint, string, time, sha, anydbm nowtime = time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()) qdb = anydbm.open(sys.argv[0] + '.db', 'c') def title(i): q = re.sub(r'\n', ' ', string.join(quotes[i])) q = re.sub(r'\s+', ' ', q) q = re.sub(r'^\s+', '', q) q = re.sub(r'<.*?>', '', q) q = re.sub(r'^[- ]*', '', q) q = re.sub('((\w+\W+){7}).*', r'\1', q, 1) q = re.sub(r'[-\W]*$', '', q) return cgi.escape(q) def cmt(i): c = string.join(comments[i]) c = re.sub('— *', '', c, 1) return c def description(i): d = '
' + string.join(quotes[i]) + '' d += '
—' + cmt(i) + '
' return cgi.escape(d) def guid(i): return sha.new(string.join(quotes[i])).hexdigest() def item(i): g = guid(i) date = nowtime if qdb.has_key(g): date = qdb[g] else: qdb[g] = date print '', re.IGNORECASE) equot = re.compile('', re.IGNORECASE) hr = re.compile('?hr>', re.IGNORECASE) empty_par = re.compile(r'
\s*
', re.IGNORECASE) dash = re.compile('— *') quotes = [] comments = [] buf = [] for line in sys.stdin: if end.search(line): break elif squot.search(line): if len(buf) != 0: comments.append(buf) buf = [] elif equot.search(line): quotes.append(buf) buf = [] else: line = hr.sub('', line) line = empty_par.sub('', line) if line != '\n': buf.append(line) comments.append(buf) #pprint.pprint(comments) # pprint.pprint(quotes) #sys.exit(0) preamble = '''