#!/usr/bin/env python
# vim:fileencoding=utf-8

import HyperEstraier

import os

class HEGatherer:
	def __init__(self, dbpath):
		self.db = HyperEstraier.Database()
		self.db.open(dbpath, HyperEstraier.Database.DBWRITER | HyperEstraier.Database.DBCREAT)

	def _put_doc(self, fname):
		print fname

		doc = HyperEstraier.Document()

		doc.add_attr('@uri', "file://" + fname)
		doc.add_attr('@title', fname)

		text = unicode(open(fname).read(), 'iso-2022-jp', 'ignore')
		doc.add_text(text.encode('utf-8'))

		self.db.put_doc(doc, HyperEstraier.Database.PDCLEAN)

	def put_dir(self, dirname):
		for root, dirs, files in os.walk(dirname):
			for fname in files:
				self._put_doc("%s/%s" % (root, fname))

if __name__ == '__main__':
	import sys

	if len(sys.argv) != 3:
		print >>sys.stderr, "%s dbname dirname" % sys.argv[0]
		sys.exit()

	dbname = sys.argv[1]
	dirname = sys.argv[2]

	heg = HEGatherer(dbname)
	heg.put_dir(dirname)

