#!/usr/bin/python # -*- coding: utf-8 -*- # ~/bin/cron/cron-bookmarks # translate the Firefox (once Netscape) bookmarks database (once HTML list) # into something public, hiding items whose description begins with # the word HIDDEN # Notes: # Python UTF-8 handling is really, really annoying. # And don't even get me started about not being able to put it in doc strings. # Author C. Alex. North-Keys # Incept Fri Jan 24 18:06:36 CST 1997 # Update 2002-05-11 02:54:45 CDT (May Sat) += mozilla, more perl impl. # Update 2006-08-03 01:03:16 CDT (Aug Thu) bookmarks via link in .mozilla # Update 2009-03-21 02:44:35 CDT (Mar Sat) translation to Python from __future__ import with_statement import os import sys import shutil import subprocess import sqlite3 import lxml import lxml.builder hide_hiddens = True query = ('SELECT ' 'id, ' 'main.type AS kind, ' 'main.position AS position, ' '(SELECT title FROM moz_bookmarks AS other ' ' WHERE other.id = main.parent) AS parent, ' 'main.parent AS parentid, ' '(SELECT url FROM moz_places AS places ' ' WHERE places.id = main.fk) AS uri, ' 'main.title AS name, ' '(SELECT content FROM moz_items_annos AS items ' # 3 = text, 2 = time? ' WHERE items.item_id = main.id AND items.type = 3) AS note ' 'FROM moz_bookmarks AS main;') # INPUT home = os.environ['HOME'] bookmarks_sqlite = os.environ['BOOKMARKS'] if os.environ.has_key('BOOKMARKS') \ else os.path.join(home, '.mozilla', 'places.sqlite') bookmarks_sqlite_tmp = '/tmp/' + str(os.getpid()) # OUTPUT - full wwwpriv = 'www/@auth/private' bookmarks_priv_xml_out = os.path.join(home, wwwpriv, 'bookmarks.xml') bookmarks_priv_html_out = os.path.join(home, wwwpriv, 'bookmarks-core.shtml') # OUTPUT - no HIDDENS bookmarks_pub_xml_out = os.path.join(home, 'www', 'bookmarks.xml') bookmarks_pub_html_out = os.path.join(home, 'www', 'bookmarks-core.shtml') shutil.copy(bookmarks_sqlite, bookmarks_sqlite_tmp) x = lxml.builder.ElementMaker() xt_top = x('folders', x('title', 'root')) xt_top.attrib['title'] = 'root' db = sqlite3.connect(bookmarks_sqlite_tmp) cur = db.cursor() cur.execute(query) # =0============================================ parsing ====== nodes = {} # none? none? none? none? for (id, kind, position, folder, folderid, url, title, note) in cur: #(1, 2, 0, None, 0, None, u'', None) #(11, 2, 1, u'Book...', 3, None, u'Acts', None) #(55, 1, 5, u'\u30...', 52, u'...', u'About the Logo', None) if folder is None: folder = '' if url is None: url = '' if title is None: title = '' if note is None: note = '' nodetype = None if 1 == kind: nodetype = 'bookmark' elif 2 == kind: nodetype = 'folder' elif 3 == kind: continue # nodetype = 'mystery'? just skipping else: print 'KIND error: %r' % (kind, ) xt_node = x(nodetype, x('title', title)) if 'bookmark' == nodetype: xt_url = x('url', url) xt_node.append(xt_url) if (0 < len(note)): xt_note = x('note', note) xt_node.append(xt_note) xt_node.attrib['title'] = title nodes[id] = (xt_node, nodetype, id, folderid, position) # =1=========================================== foldering ===== bookmarks_by_folder_id = {} # bookmarks_by_folder_id[id] = [] of bookmarks folders = {} # folders[id] = a folder folders[0] = xt_top for (id, (xt_node, nodetype, id, parent_id, position)) in nodes.iteritems(): if not bookmarks_by_folder_id.has_key(parent_id): bookmarks_by_folder_id[parent_id] = list() bookmarks_by_folder_id[parent_id].append((xt_node, position)) if 'folder' == nodetype: folders[id] = xt_node # =2============================================ ordering ===== def sort_on_int2nd_of_tuple(a, b): (a_ign, a_sort) = a (b_ign, b_sort) = b return cmp(int(a_sort), int(b_sort)) for id in bookmarks_by_folder_id.iterkeys(): bookmarks_by_folder_id[id].sort(sort_on_int2nd_of_tuple) for (id, stuff) in bookmarks_by_folder_id.iteritems(): for (xt_node, _) in stuff: folders[id].append(xt_node) xt_top.append(folders[1]) # =3=========================================== stripping ===== def xml2html_note_check(xml_node_check, xml_node_target): xt_note = xml_node_check.find('note') if xt_note is not None: note = xt_note.text if 0 < len(note): xt_html_note = x('div', note) xt_html_note.attrib['class'] = 'note' xml_node_target.append(xt_html_note) def xml2html_folder(xml_folder): '''Returns the html_folder node derived from the XML_FOLDER.''' title = xml_folder.find('title').text html_xt_folder = x('div', x('h3', title)) xml2html_note_check(xml_folder, html_xt_folder) html_xt_list = x('ul') for xml_node in xml_folder: html_xt_node = None if 'folder' == xml_node.tag: html_xt_node = x('li', xml2html_folder(xml_node)) elif 'bookmark' == xml_node.tag: url = xml_node.find('url').text title = xml_node.find('title').text xt_note = xml_node.find('note') html_xt_a = x('a', title) html_xt_a.attrib['href'] = url html_xt_node = x('li', html_xt_a) xml2html_note_check(xml_node, html_xt_node) elif 'title' == xml_node.tag: pass # already handled elif 'note' == xml_node.tag: pass # not using these else: sys.stderr.write('oops on type %s\n' % (xml_node.tag,)) if html_xt_node is not None: html_xt_list.append(html_xt_node) if 0 < len(html_xt_list): html_xt_folder.append(html_xt_list) return html_xt_folder xt_bookmarks_toolbar = xt_top.xpath( \ './/folder/title[text()="Bookmarks Toolbar"]/..') if xt_bookmarks_toolbar is not None: xt_top = xt_bookmarks_toolbar[0] # private version with hidden stuff xml_full = ('\n' + lxml.etree.tostring(xt_top, pretty_print=True)) html_full = lxml.etree.tostring(xml2html_folder(xt_top), pretty_print=True) # remove hidden stuff if hide_hiddens: for xt_node in xt_top.xpath('.//note[starts-with(text(),"HIDDEN")]/..'): xt_parent = xt_node.getparent() if xt_parent is not None: xt_parent.remove(xt_node) # public version xml = ('\n' + lxml.etree.tostring(xt_top, pretty_print=True)) html = lxml.etree.tostring(xml2html_folder(xt_top), pretty_print=True) # create/update the bookmark files with open(bookmarks_priv_xml_out, 'w') as output: output.write(xml_full) # autoconvert with open(bookmarks_priv_html_out, 'w') as output: output.write(html_full) with open(bookmarks_pub_xml_out, 'w') as output: output.write(xml) # autoconvert with open(bookmarks_pub_html_out, 'w') as output: output.write(html) #------------------------------------------------------------eof