BookmarkManagerImportScripts:Wikka

BookmarkManager Import Scripts

A collection of scripts to assist with importing bookmarks from other sources into BookmarkManager

Firefox import

Parses a Firefox-generated bookmarks.html file into output suitable for processing by importDelirious.pl (see below). This is a very crude script that creates tags from the title and description text (but not keywords). It could stand some refinement, but it got the job done for me (over 400+ Firefox bookmarks).

Usage: parseFirefox.pl > myLinks.txt; importDelirious.pl myLinks.txt

parseFirefox.pl

#! /usr/bin/perl
#
# $Id: parseFirefox.pl,v 1.1 2006/08/10 04:21:13 brian Exp brian $
#
# parseFirefox.pl - Parses a Firefox bookmarks.html file
#
# Please note that this does not parse the Keywords field, nor does it
# preserve date/time stamps. It is probably not suitable for
# everyone without further modification, but it will get the basic job
# done of migrating bookmarks from Firefox to BookmarkManager.
#
# Usage: parseFirefox.pl > myLinks.txt; importDelirious.pl
#
#####################################################################
require HTML::TreeBuilder;

my $dict = build_dict();

foreach my $filename(@ARGV) {
my $tree = HTML::TreeBuilder->new;
$tree->parse_file($filename);
$tree->elementify();
@nodes = $tree->look_down("_tag","a");
foreach $node(@nodes) {
$link = $node->extract_links();
$title = $link->[0]->[1]->as_text();
print "Title: $title\n";
print "URI: $link->[0]->[0]\n";

# Get description, if any
$_=$node->look_down("class","extended");
print "Desc: ";
if($_) {
$desc = $_->as_text();
print "$desc";
}
print "\n";

# Create taglist
my @rawterms = split(/[^A-Za-z0-9]\s*/, $title);
push(@rawterms, split(/[^A-Za-z0-9]\s*/, $desc));
@rawterms = map { lc $_ } @rawterms;
my @terms = grep { filter_terms($_, $dict) } @rawterms;
my $tags = join(' ', @terms);
print "Tags: $tags";

print "\n\n\n";
}
$tree = $tree->delete;
}

sub filter_terms() {
my($term, $dict) = @_;
$term = lc($term);
if( exists $dict->{$term} ||
$term !~ /^[a-z0-9]+$/ ) {
return 0;
}
return $term;
}

sub build_dict() {
# Source: http://www.andreas.com/faq-thousandwords.html
my $common_words =
"the, to, of, a, I, and, is, in, that, it, for, you, on, be, have, are,
with, not, this, or, as, was, but, at, in, from, by, an, if, they,
about, would, can, one, my, will, all, do, has, like, there, me, out,
your, what, which, some, so, we, more, who, any, up, get, am, if,
just, he, no, other, people, know, only, their, than, this, it, think,
when, them, been, time, had, were, and, note, his, should, use, then,
also, good, how, could, way, very, into, much, make, because, these,
see, may, as, even, you, two, want, most, new, many, well, such,
really, first, same, those, our, now, say, work, being, used, too,
anyone, here, where, over, what, right, but, problem, did, something,
go, there, her, back, file, we, still, need, said, find, off, him,
after, point, before, take, us, going, they, might, since, never,
better, read, name, got, long, someone, she, why, last, few, all, my,
number, must, using, own, little, made, down, believe, he, so, while,
line, both, around, another, through, for, thing, without, case, also,
no, between, year, set, sure, probably, enough, different, least,
group, else, put, lot, direct, each, information, part, how, any,
question, old, real, course, anything, fact, when, best, call, end,
give, help, demand, at, is, come, called, person, either, under, run,
try, done, though, always, list, look, news, world, thought, far,
again, available, seen, quite, rather, to, less, life, one, day,
great, found, tell, women, every, ever, against, place, after,
general, having, mean, above, heard, thanks, doing, able, high, from,
next, state, change, book, now, talk, well, new, possible, please,
bad, seem, man, following, send, example, several, computer, reason,
that, trying, getting, true, feel, wrong, type, let, stuff, keep,
hard, left, idea, show, power, remember, looking, why, until, game,
local, ago, others, car, are, actually, three, four, five, six, seven,
eight, nine, ten, yet, message, away, machine, interested, kind,
large, sun, already, order, small, government, space, free, running,
first, second, third, fourth, fifth, however, money, nothing, home,
level, music, start, issue, men, an, whether, given, test, user, big,
pretty, address, once, agree, area, include, write, mind, experience,
memory, original, of, discussion, word, god, understand, matter, not,
during, play, standard, making, hand, copy, whole, do, human,
interesting, just, cannot, yes, often, disk, side, maybe, these, nice,
came, public, some, source, dave, guess, hourly, open, almost, full,
buy, important, response, ask, return, simply, mark, went, hope, told,
tried, wanted, story, process, saying, form, another, love, couple,
law, answer, live, city, since, working, country, sort, major,
everyone, cost, care, word, usually, company, water, reading,
actually, instead, job, written, size, or, single, sense, pay,
language, short, lines, then, question, certainly, later, anyone,
note, speed, saw, similar, week, can, light, friend, certain,
difference, including, myself, response, hear, within, however, add,
correct, science, become, text, center, top, asked, error, known,
perhaps, consider, sound, easy, price, started, especially, rights,
stop, rest, everything, talking, local, recently, whatever,
particular, half, low, simple, define, network, subject, except,
provide, class, fine, check, woman, took, interest, along, she, turn,
due, clear, close, past, children, by, phone, argument, various,
result, although, opinion, worth, mode, together, mine, night, cause,
common, effect, position, maybe, head, likely, itself, situation,
unless, special, move, window, leave, allow, box, anyway, yes, sent,
personal, self, mentioned, claim, taken, record, future, function,
child, because, field, exactly, longer, view, four, most, happen,
expect, room, changed, front, today, rate, business, recent, with,
movie, main, needed, screen, wrote, anyway, early, product, issue,
performance, your, lost, anybody, page, looks, amount, house, who,
first, wish, gun, root, market, statement, necessary, fun, design,
month, thinking, date, history, happened, state, soon, break, death,
card, legal, choice, evidence, minute, war, body, taking, even, idea,
yourself, perhaps, release, involved, format, useful, although,
writing, chance, while, black, assume, upon, kill, received, required,
playing, output, week, cup, air, radio, willing, change, near,
complete, here, reasons, played, vote, present, related, tv,
political, quality, currently, environment, string, learn, paper,
color, hold, advance, fast, force, cut, considered, sometimes,
difficult, outside, album, save, specific, completely, doubt, food,
folk, total, site, show, normal, directly, white, among, coming,
family, religion, supposed, solution, culture, dead, development,
reasonable, create, decided, appropriate, knowledge, behind, exist,
suggest, buffer, science, action, entire, below, has";
my %dict = map { $_, 1 } split(/,\s*/, $common_words);
return \%dict;
}

de.lirio.us import

The two Perl scripts that follow can be used to import bookmarks from de.lirio.us. Use your browser to save a page of bookmarks as an HTML file (you might have to save multiple pages; that's OK, the script can handle it). Export the HTML data into text format:

parseDelirious.pl file1.html file2.html file3.html > myLinks.txt

Change the $base_url, $wikiname, and $password global vars in importDelirious.pl, then run against the file created in the previous step:

importDelirious.pl myLinks.txt

parseDelirious.pl

#! /usr/bin/perl
#
# $Id: parseDelirious.pl,v 1.2 2006/05/30 03:27:21 brian Exp brian $
#
# parseDelirious.pl - Parses a de.lirio.us screen dump (as saved by
# Firefox)
#
#####################################################################
require HTML::TreeBuilder;

foreach my $filename(@ARGV) {
my $tree = HTML::TreeBuilder->new;
$tree->parse_file($filename);
$tree->elementify();
@nodes = $tree->look_down("class","xfolkentry");
foreach $node(@nodes) {
$_ = $node->look_down("class","uri");
$link = $_->extract_links();
$title = $link->[0]->[1]->as_text();
print "Title: $title\n";
print "URI: $link->[0]->[0]\n";

# Get description, if any
$_=$node->look_down("class","extended");
print "Desc: ";
if($_) {
$desc = $_->as_text();
print "$desc";
}
print "\n";

# A de.lirio.us export quirk prevents some tags from
# displaying; default these to "@private" for later review
@_ = $node->look_down("class","tag");
print "Tags: ";
if($#_ < 0) {
print "\@private";
}
foreach $tagnode(@_) {
$link = $tagnode->extract_links();
$tag = $link->[0]->[1]->as_text();
print "$tag ";
}
print "\n\n\n";
}
$tree = $tree->delete;
}

importDelirious.pl

#! /usr/bin/perl
#
# $Id: importDelirious.pl,v 1.3 2006/05/31 04:43:19 brian Exp brian $
#
# importDelirious.pl -- Imports file created by parseDelirious.pl
#
# Usage: importDelirious.pl exportFile
#
#####################################################################

require LWP::UserAgent;
require HTTP::Cookies;

###Global###
$base_url = "http://some.url.com/wiki/";
$bookmark_page = "Bookmarks";
$wikiname = "YourName";
$password = "yourpassword";

$ua = LWP::UserAgent->new;
$cookie_jar = HTTP::Cookies->new(file => "lwpcookies.txt",
autosave => 1);
$ua->cookie_jar($cookie_jar);

# Login
$login_url = $base_url."wikka.php?wakka=UserSettings";
my $req = HTTP::Request->new(POST=>"$login_url");
$req->content_type('application/x-www-form-urlencoded');
$req->content("name=$wikiname&password=$password&action=login&wakka=UserSettings");
my $res = $ua->request($req);
$cookie_jar->extract_cookies($res);

# Import
open(IN, "<$ARGV[0]") || die "Can't open $ARGV[0] for reading!";
# Set autoflush so progress is displayed
my $oldfh = select(STDOUT); $| = 1; select($oldfh);
print "Importing...";
while(<IN>) {
print ".";
next until /Title: /;
chomp;
$title = (split(": ",$_))[1];
$_ = <IN>;
chomp;
$uri = (split(": ",$_))[1];
$_ = <IN>;
chomp;
$desc = (split(": ",$_))[1];
$_ = <IN>;
chomp;
$tags = (split(": ",$_))[1];
$add_url = $base_url."wikka.php?wakka=".$bookmark_page."&action=add";
$req = HTTP::Request->new(POST=>"$add_url");
$req->content_type('application/x-www-form-urlencoded');
$req->content("title=$title&uri=$uri&desc=$desc&tags=$tags");
$ua->request($req);
}
print "done!\n";

Wikka : BookmarkManagerImportScripts