BookmarkManager Import Scripts


A collection of scripts to assist with importing bookmarks from other sources into BookmarkManager
 


Firefox import


Usage: parseFirefox.pl > myLinks.txt; importDelirious.pl myLinks.txt


parseFirefox.pl
#! /usr/bin/perl
#
# $Id: parseFirefox.pl,v 1.1 2006/08/10 04:21:13 brian Exp brian $
#
# parseFirefox.pl - Parses a Firefox bookmarks.html file
#
# Please note that this does not parse the Keywords field, nor does it
# preserve date/time stamps.  It is probably not suitable for
# everyone without further modification, but it will get the basic job
# done of migrating bookmarks from Firefox to BookmarkManager.
#
# Usage: parseFirefox.pl > myLinks.txt; importDelirious.pl
#
#####################################################################
require HTML::TreeBuilder;

my $dict = build_dict();

foreach my $filename(@ARGV) {
    my $tree = HTML::TreeBuilder->new;
    $tree->parse_file($filename);
    $tree->elementify();
    @nodes = $tree->look_down("_tag","a");
    foreach $node(@nodes) {
        $link = $node->extract_links();
        $title = $link->[0]->[1]->as_text();
        print "Title: $title\n";
        print "URI: $link->[0]->[0]\n";

        # Get description, if any
        $_=$node->look_down("class","extended");
        print "Desc: ";
        if($_) {
            $desc = $_->as_text();
            print "$desc";
        }
        print "\n";

        # Create taglist
        my @rawterms = split(/[^A-Za-z0-9]\s*/, $title);
        push(@rawterms, split(/[^A-Za-z0-9]\s*/, $desc));
        @rawterms = map { lc $_ } @rawterms;
        my @terms = grep { filter_terms($_, $dict) } @rawterms;
        my $tags = join(' ', @terms);
        print "Tags: $tags";
           
        print "\n\n\n";
    }
    $tree = $tree->delete;
}   

sub filter_terms() {
    my($term, $dict) = @_;
    $term = lc($term);
    if( exists $dict->{$term} ||
        $term !~ /^[a-z0-9]+$/ ) {
        return 0;
    }
    return $term;
}

sub build_dict() {
    # Source: http://www.andreas.com/faq-thousandwords.html
    my $common_words =
    "the, to, of, a, I, and, is, in, that, it, for, you, on, be, have, are,
    with, not, this, or, as, was, but, at, in, from, by, an, if, they,
    about, would, can, one, my, will, all, do, has, like, there, me, out,
    your, what, which, some, so, we, more, who, any, up, get, am, if,
    just, he, no, other, people, know, only, their, than, this, it, think,
    when, them, been, time, had, were, and, note, his, should, use, then,
    also, good, how, could, way, very, into, much, make, because, these,
    see, may, as, even, you, two, want, most, new, many, well, such,
    really, first, same, those, our, now, say, work, being, used, too,
    anyone, here, where, over, what, right, but, problem, did, something,
    go, there, her, back, file, we, still, need, said, find, off, him,
    after, point, before, take, us, going, they, might, since, never,
    better, read, name, got, long, someone, she, why, last, few, all, my,
    number, must, using, own, little, made, down, believe, he, so, while,
    line, both, around, another, through, for, thing, without, case, also,
    no, between, year, set, sure, probably, enough, different, least,
    group, else, put, lot, direct, each, information, part, how, any,
    question, old, real, course, anything, fact, when, best, call, end,
    give, help, demand, at, is, come, called, person, either, under, run,
    try, done, though, always, list, look, news, world, thought, far,
    again, available, seen, quite, rather, to, less, life, one, day,
    great, found, tell, women, every, ever, against, place, after,
    general, having, mean, above, heard, thanks, doing, able, high, from,
    next, state, change, book, now, talk, well, new, possible, please,
    bad, seem, man, following, send, example, several, computer, reason,
    that, trying, getting, true, feel, wrong, type, let, stuff, keep,
    hard, left, idea, show, power, remember, looking, why, until, game,
    local, ago, others, car, are, actually, three, four, five, six, seven,
    eight, nine, ten, yet, message, away, machine, interested, kind,
    large, sun, already, order, small, government, space, free, running,
    first, second, third, fourth, fifth, however, money, nothing, home,
    level, music, start, issue, men, an, whether, given, test, user, big,
    pretty, address, once, agree, area, include, write, mind, experience,
    memory, original, of, discussion, word, god, understand, matter, not,
    during, play, standard, making, hand, copy, whole, do, human,
    interesting, just, cannot, yes, often, disk, side, maybe, these, nice,
    came, public, some, source, dave, guess, hourly, open, almost, full,
    buy, important, response, ask, return, simply, mark, went, hope, told,
    tried, wanted, story, process, saying, form, another, love, couple,
    law, answer, live, city, since, working, country, sort, major,
    everyone, cost, care, word, usually, company, water, reading,
    actually, instead, job, written, size, or, single, sense, pay,
    language, short, lines, then, question, certainly, later, anyone,
    note, speed, saw, similar, week, can, light, friend, certain,
    difference, including, myself, response, hear, within, however, add,
    correct, science, become, text, center, top, asked, error, known,
    perhaps, consider, sound, easy, price, started, especially, rights,
    stop, rest, everything, talking, local, recently, whatever,
    particular, half, low, simple, define, network, subject, except,
    provide, class, fine, check, woman, took, interest, along, she, turn,
    due, clear, close, past, children, by, phone, argument, various,
    result, although, opinion, worth, mode, together, mine, night, cause,
    common, effect, position, maybe, head, likely, itself, situation,
    unless, special, move, window, leave, allow, box, anyway, yes, sent,
    personal, self, mentioned, claim, taken, record, future, function,
    child, because, field, exactly, longer, view, four, most, happen,
    expect, room, changed, front, today, rate, business, recent, with,
    movie, main, needed, screen, wrote, anyway, early, product, issue,
    performance, your, lost, anybody, page, looks, amount, house, who,
    first, wish, gun, root, market, statement, necessary, fun, design,
    month, thinking, date, history, happened, state, soon, break, death,
    card, legal, choice, evidence, minute, war, body, taking, even, idea,
    yourself, perhaps, release, involved, format, useful, although,
    writing, chance, while, black, assume, upon, kill, received, required,
    playing, output, week, cup, air, radio, willing, change, near,
    complete, here, reasons, played, vote, present, related, tv,
    political, quality, currently, environment, string, learn, paper,
    color, hold, advance, fast, force, cut, considered, sometimes,
    difficult, outside, album, save, specific, completely, doubt, food,
    folk, total, site, show, normal, directly, white, among, coming,
    family, religion, supposed, solution, culture, dead, development,
    reasonable, create, decided, appropriate, knowledge, behind, exist,
    suggest, buffer, science, action, entire, below, has"
;
    my %dict = map { $_, 1 } split(/,\s*/, $common_words);
    return \%dict;
}


de.lirio.us import


parseDelirious.pl file1.html file2.html file3.html > myLinks.txt


Change the $base_url, $wikiname, and $password global vars in importDelirious.pl, then run against the file created in the previous step:

importDelirious.pl myLinks.txt


parseDelirious.pl
#! /usr/bin/perl
#
# $Id: parseDelirious.pl,v 1.2 2006/05/30 03:27:21 brian Exp brian $
#
# parseDelirious.pl - Parses a de.lirio.us screen dump (as saved by
# Firefox)
#
#####################################################################
require HTML::TreeBuilder;

foreach my $filename(@ARGV) {
    my $tree = HTML::TreeBuilder->new;
    $tree->parse_file($filename);
    $tree->elementify();
    @nodes = $tree->look_down("class","xfolkentry");
    foreach $node(@nodes) {
        $_ = $node->look_down("class","uri");
        $link = $_->extract_links();
        $title = $link->[0]->[1]->as_text();
        print "Title: $title\n";
        print "URI: $link->[0]->[0]\n";

        # Get description, if any
        $_=$node->look_down("class","extended");
        print "Desc: ";
        if($_) {
            $desc = $_->as_text();
            print "$desc";
        }
        print "\n";

        # A de.lirio.us export quirk prevents some tags from
        # displaying; default these to "@private" for later review
        @_ = $node->look_down("class","tag");
        print "Tags: ";
        if($#_ < 0) {
            print "\@private";
        }
        foreach $tagnode(@_) {
            $link = $tagnode->extract_links();
            $tag = $link->[0]->[1]->as_text();
            print "$tag ";
        }
        print "\n\n\n";
    }
    $tree = $tree->delete;
}


importDelirious.pl
#! /usr/bin/perl
#
# $Id: importDelirious.pl,v 1.3 2006/05/31 04:43:19 brian Exp brian $
#
# importDelirious.pl -- Imports file created by parseDelirious.pl
#
# Usage: importDelirious.pl exportFile
#
#####################################################################

require LWP::UserAgent;
require HTTP::Cookies;

###Global###
$base_url = "http://some.url.com/wiki/";
$bookmark_page = "Bookmarks";
$wikiname = "YourName";
$password = "yourpassword";

$ua = LWP::UserAgent->new;
$cookie_jar = HTTP::Cookies->new(file => "lwpcookies.txt",
                                   autosave => 1);
$ua->cookie_jar($cookie_jar);

# Login
$login_url = $base_url."wikka.php?wakka=UserSettings";
my $req = HTTP::Request->new(POST=>"$login_url");
$req->content_type('application/x-www-form-urlencoded');
$req->content("name=$wikiname&password=$password&action=login&wakka=UserSettings");
my $res = $ua->request($req);
$cookie_jar->extract_cookies($res);

# Import
open(IN, "<$ARGV[0]") || die "Can't open $ARGV[0] for reading!";
# Set autoflush so progress is displayed
my $oldfh = select(STDOUT); $| = 1; select($oldfh);
print "Importing...";
while(<IN>) {
    print ".";
    next until /Title: /;
    chomp;
    $title = (split(": ",$_))[1];
    $_ = <IN>;
    chomp;
    $uri = (split(": ",$_))[1];
    $_ = <IN>;
    chomp;
    $desc = (split(": ",$_))[1];
    $_ = <IN>;
    chomp;
    $tags = (split(": ",$_))[1];
    $add_url = $base_url."wikka.php?wakka=".$bookmark_page."&action=add";
    $req = HTTP::Request->new(POST=>"$add_url");
    $req->content_type('application/x-www-form-urlencoded');
    $req->content("title=$title&uri=$uri&desc=$desc&tags=$tags");
    $ua->request($req);
}
print "done!\n";
There are no comments on this page.
Valid XHTML :: Valid CSS: :: Powered by WikkaWiki