diff --git a/i2p2www/anonbib/BibTeX.py b/i2p2www/anonbib/BibTeX.py new file mode 100644 index 00000000..110e5ff3 --- /dev/null +++ b/i2p2www/anonbib/BibTeX.py @@ -0,0 +1,1268 @@ +#!/usr/bin/python2 +# Copyright 2003-2008, Nick Mathewson. See LICENSE for licensing info. + +"""BibTeX.py -- parse and manipulate BibTeX files and entries. + + Based on perl code by Eddie Kohler; heavily modified. +""" + +import cStringIO +import re +import sys +import os + +import config + +import rank + +__all__ = [ 'ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize', + 'ParsedAuthor', 'FileIter', 'Parser', 'parseFile', + 'splitBibTeXEntriesBy', 'sortBibTexEntriesBy', ] + +# List: must map from month number to month name. +MONTHS = [ None, + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"] + +# Fields that we only care about for making web pages (BibTeX doesn't +# recognize them.) +WWW_FIELDS = [ 'www_section', 'www_important', 'www_remarks', + 'www_abstract_url', 'www_html_url', 'www_pdf_url', 'www_ps_url', + 'www_txt_url', 'www_ps_gz_url', 'www_amazon_url', + 'www_excerpt_url', 'www_publisher_url', + 'www_cache_section', 'www_tags' ] + +def url_untranslate(s): + """Change a BibTeX key into a string suitable for use in a URL.""" + s = re.sub(r'([%<>`#, &_\';])', + lambda m: "_%02x"%ord(m.group(1)), + s) + s = s.replace("/",":") + return s + +class ParseError(Exception): + """Raised on invalid BibTeX""" + pass + + +def smartJoin(*lst): + """Equivalent to os.path.join, but handle"." and ".." entries a bit better. + """ + lst = [ item for item in lst if item != "." ] + idx = 0 + while idx < len(lst): + if idx > 0 and lst[idx] == "..": + del lst[idx] + else: + idx += 1 + return os.path.join(*lst) + +class BibTeX: + """A parsed BibTeX file""" + def __init__(self): + self.entries = [] # List of BibTeXEntry + self.byKey = {} # Map from BibTeX key to BibTeX entry. + def addEntry(self, ent): + """Add a BibTeX entry to this file.""" + k = ent.key + if self.byKey.get(ent.key.lower()): + print >> sys.stderr, "Already have an entry named %s"%k + return + self.entries.append(ent) + self.byKey[ent.key.lower()] = ent + def resolve(self): + """Validate all entries in this file, and resolve cross-references""" + seen = {} + for ent in self.entries: + seen.clear() + while ent.get('crossref'): + try: + cr = self.byKey[ent['crossref'].lower()] + except KeyError: + print "No such crossref: %s"% ent['crossref'] + break + if seen.get(cr.key): + raise ParseError("Circular crossref at %s" % ent.key) + seen[cr.key] = 1 + del ent.entries['crossref'] + + if cr.entryLine < ent.entryLine: + print "Warning: crossref %s used after declaration"%cr.key + + for k in cr.entries.keys(): + if ent.entries.has_key(k): + print "ERROR: %s defined both in %s and in %s"%( + k,ent.key,cr.key) + else: + ent.entries[k] = cr.entries[k] + + ent.resolve() + newEntries = [] + rk = config.REQUIRE_KEY + if rk is None: + HTTP/1.1 200 OK Connection: close Transfer-Encoding: chunked Cache-Control: max-age=0, private, must-revalidate, no-transform Set-Cookie: i_like_gitea=17bd49f34ab38567; Path=/; HttpOnly; Secure; SameSite=Lax Set-Cookie: _csrf=4ProXfo1B0GgRfKTeRMM8GITMwk6MTc1MzI2ODYwODEwOTc3MDM0Mw; Path=/; Max-Age=86400; HttpOnly; Secure; SameSite=Lax X-Frame-Options: SAMEORIGIN Date: Wed, 23 Jul 2025 11:03:28 GMT Content-Type: text/plain; charset=utf-8 X-Cache-Status: HIT X-Cache-Age: 0 8000 diff --git a/i2p2www/anonbib/BibTeX.py b/i2p2www/anonbib/BibTeX.py new file mode 100644 index 00000000..110e5ff3 --- /dev/null +++ b/i2p2www/anonbib/BibTeX.py @@ -0,0 +1,1268 @@ +#!/usr/bin/python2 +# Copyright 2003-2008, Nick Mathewson. See LICENSE for licensing info. + +"""BibTeX.py -- parse and manipulate BibTeX files and entries. + + Based on perl code by Eddie Kohler; heavily modified. +""" + +import cStringIO +import re +import sys +import os + +import config + +import rank + +__all__ = [ 'ParseError', 'BibTeX', 'BibTeXEntry', 'htmlize', + 'ParsedAuthor', 'FileIter', 'Parser', 'parseFile', + 'splitBibTeXEntriesBy', 'sortBibTexEntriesBy', ] + +# List: must map from month number to month name. +MONTHS = [ None, + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"] + +# Fields that we only care about for making web pages (BibTeX doesn't +# recognize them.) +WWW_FIELDS = [ 'www_section', 'www_important', 'www_remarks', + 'www_abstract_url', 'www_html_url', 'www_pdf_url', 'www_ps_url', + 'www_txt_url', 'www_ps_gz_url', 'www_amazon_url', + 'www_excerpt_url', 'www_publisher_url', + 'www_cache_section', 'www_tags' ] + +def url_untranslate(s): + """Change a BibTeX key into a string suitable for use in a URL.""" + s = re.sub(r'([%<>`#, &_\';])', + lambda m: "_%02x"%ord(m.group(1)), + s) + s = s.replace("/",":") + return s + +class ParseError(Exception): + """Raised on invalid BibTeX""" + pass + + +def smartJoin(*lst): + """Equivalent to os.path.join, but handle"." and ".." entries a bit better. + """ + lst = [ item for item in lst if item != "." ] + idx = 0 + while idx < len(lst): + if idx > 0 and lst[idx] == "..": + del lst[idx] + else: + idx += 1 + return os.path.join(*lst) + +class BibTeX: + """A parsed BibTeX file""" + def __init__(self): + self.entries = [] # List of BibTeXEntry + self.byKey = {} # Map from BibTeX key to BibTeX entry. + def addEntry(self, ent): + """Add a BibTeX entry to this file.""" + k = ent.key + if self.byKey.get(ent.key.lower()): + print >> sys.stderr, "Already have an entry named %s"%k + return + self.entries.append(ent) + self.byKey[ent.key.lower()] = ent + def resolve(self): + """Validate all entries in this file, and resolve cross-references""" + seen = {} + for ent in self.entries: + seen.clear() + while ent.get('crossref'): + try: + cr = self.byKey[ent['crossref'].lower()] + except KeyError: + print "No such crossref: %s"% ent['crossref'] + break + if seen.get(cr.key): + raise ParseError("Circular crossref at %s" % ent.key) + seen[cr.key] = 1 + del ent.entries['crossref'] + + if cr.entryLine < ent.entryLine: + print "Warning: crossref %s used after declaration"%cr.key + + for k in cr.entries.keys(): + if ent.entries.has_key(k): + print "ERROR: %s defined both in %s and in %s"%( + k,ent.key,cr.key) + else: + ent.entries[k] = cr.entries[k] + + ent.resolve() + newEntries = [] + rk = config.REQUIRE_KEY + if rk is None: + 4000 for author in authors: + commas = 0 + fvl = [] + vl = [] + f = [] + v = [] + l = [] + j = [] + cur = fvl + for item in author: + if item == ',': + if commas == 0: + vl = fvl + fvl = [] + cur = f + else: + j.extend(f) + cur = f = [] + commas += 1 + else: + cur.append(item) + + if commas == 0: + split_von(f,v,l,fvl) + else: + f_tmp = [] + split_von(f_tmp,v,l,vl) + + parsedAuthors.append(ParsedAuthor(f,v,l,j)) + + return parsedAuthors + +ALLCHARS = "".join(map(chr,range(256))) +PRINTINGCHARS = "\t\n\r"+"".join(map(chr,range(32, 127))) +LC_CHARS = "abcdefghijklmnopqrstuvwxyz" +SV_DELCHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "@") +RE_ESCAPED = re.compile(r'\\.') +def split_von(f,v,l,x): + in_von = 0 + while x: + tt = t = x[0] + del x[0] + if tt[:2] == '{\\': + tt = tt.translate(ALLCHARS, SV_DELCHARS) + tt = RE_ESCAPED.sub("", tt) + tt = tt.translate(ALLCHARS, "{}") + if tt.translate(ALLCHARS, LC_CHARS) == "": + v.append(t) + in_von = 1 + elif in_von and f is not None: + l.append(t) + l.extend(x) + return + else: + f.append(t) + if not in_von: + l.append(f[-1]) + del f[-1] + + +class Parser: + """Parser class: reads BibTeX from a file and returns a BibTeX object.""" + ## Fields + # strings: maps entry string keys to their values. + # newStrings: all string definitions not in config.INITIAL_STRINGS + # invStrings: map from string values to their keys. + # fileiter: the line iterator we're parsing from. + # result: the BibTeX object that we're parsing into + # litStringLine: the line on which we started parsing a literal string; + # 0 for none. + # entryLine: the line on which the current entry started; 0 for none. + # + # curEntType: the type of the entry we're parsing now. (paper,article,etc) + def __init__(self, fileiter, initial_strings, result=None): + self.strings = config.INITIAL_STRINGS.copy() + self.strings.update(initial_strings) + self.newStrings = {} + self.invStrings = {} + for k,v in config.INITIAL_STRINGS.items(): + self.invStrings[v]=k + self.fileiter = fileiter + if result is None: + result = BibTeX() + self.result = result + self.litStringLine = 0 + self.entryLine = 0 + + def _parseKey(self, line): + it = self.fileiter + line = _advance(it,line) + m = KEY_RE.match(line) + if not m: + raise ParseError("Expected key at line %s"%self.fileiter.lineno) + key, line = m.groups() + return key, line + + def _parseValue(self, line): + it = self.fileiter + bracelevel = 0 + data = [] + while 1: + line = _advance(it,line) + line = line.strip() + assert line + + # Literal string? + if line[0] == '"': + line=line[1:] + self.litStringLine = it.lineno + while 1: + if bracelevel: + m = BRACE_CLOSE_RE.match(line) + if m: + data.append(m.group(1)) + data.append('}') + line = m.group(2) + bracelevel -= 1 + continue + else: + m = STRING_CLOSE_RE.match(line) + if m: + data.append(m.group(1)) + line = m.group(2) + break + m = BRACE_OPEN_RE.match(line) + if m: + data.append(m.group(1)) + line = m.group(2) + bracelevel += 1 + continue + data.append(line) + data.append(" ") + line = it.next() + self.litStringLine = 0 + elif line[0] == '{': + bracelevel += 1 + line = line[1:] + while bracelevel: + m = BRACE_CLOSE_RE.match(line) + if m: + #print bracelevel, "A", repr(m.group(1)) + data.append(m.group(1)) + bracelevel -= 1 + if bracelevel > 0: + #print bracelevel, "- '}'" + data.append('}') + line = m.group(2) + continue + m = BRACE_OPEN_RE.match(line) + if m: + bracelevel += 1 + #print bracelevel, "B", repr(m.group(1)) + data.append(m.group(1)) + line = m.group(2) + continue + else: + #print bracelevel, "C", repr(line) + data.append(line) + data.append(" ") + line = it.next() + elif line[0] == '#': + print >>sys.stderr, "Weird concat on line %s"%it.lineno + elif line[0] in "},": + if not data: + print >>sys.stderr, "No data after field on line %s"%( + it.lineno) + else: + m = RAW_DATA_RE.match(line) + if m: + s = self.strings.get(m.group(1).lower()) + if s is not None: + data.append(s) + else: + data.append(m.group(1)) + line = m.group(2) + else: + raise ParseError("Questionable line at line %s"%it.lineno) + + # Got a string, check for concatenation. + if line.isspace() or not line: + data.append(" ") + line = _advance(it,line) + line = line.strip() + assert line + if line[0] == '#': + line = line[1:] + else: + data = "".join(data) + data = re.sub(r'\s+', ' ', data) + data = re.sub(r'^\s+', '', data) + data = re.sub(r'\s+$', '', data) + return data, line + + def _parseEntry(self, line): #name, strings, entries + it = self.fileiter + self.entryLine = it.lineno + line = _advance(it,line) + + m = BRACE_BEGIN_RE.match(line) + if not m: + raise ParseError("Expected an opening brace at line %s"%it.lineno) + line = m.group(1) + + proto = { 'string' : 'p', + 'preamble' : 'v', + }.get(self.curEntType, 'kp*') + + v = [] + while 1: + line = _advance(it,line) + + m = BRACE_END_RE.match(line) + if m: + line = m.group(1) + break + if not proto: + raise ParseError("Overlong entry starting on line %s" + % self.entryLine) + elif proto[0] == 'k': + key, line = self._parseKey(line) + v.append(key) + elif proto[0] == 'v': + value, line = self._parseValue(line) + v.append(value) + elif proto[0] == 'p': + key, line = self._parseKey(line) + v.append(key) + line = _advance(it,line) + line = line.lstrip() + if line[0] == '=': + line = line[1:] + value, line = self._parseValue(line) + v.append(value) + else: + assert 0 + line = line.strip() + if line and line[0] == ',': + line = line[1:] + if proto and proto[1:] != '*': + proto = proto[1:] + if proto and proto[1:] != '*': + raise ParseError("Missing arguments to %s on line %s" % ( + self.curEntType, self.entryLine)) + + if self.curEntType == 'string': + self.strings[v[0]] = v[1] + self.newStrings[v[0]] = v[1] + self.invStrings[v[1]] = v[0] + elif self.curEntType == 'preamble': + pass + else: + key = v[0] + d = {} + for i in xrange(1,len(v),2): + d[v[i].lower()] = v[i+1] + ent = BibTeXEntry(self.curEntType, key, d) + ent.entryLine = self.entryLine + self.result.addEntry(ent) + + return line + + def parse(self): + try: + self._parse() + except StopIteration: + if self.litStringLine: + raise ParseError("Unexpected EOF in string (started on %s)" % + self.litStringLine) + elif self.entryLine: + raise ParseError("Unexpected EOF at line %s (entry started " + "on %s)" % (self.fileiter.lineno, + self.entryLine)) + + self.result.invStrings = self.invStrings + self.result.newStrings = self.newStrings + + return self.result + + def _parse(self): + it = self.fileiter + line = it.next() + while 1: + # Skip blank lines. + while not line or line.isspace() or OUTER_COMMENT_RE.match(line): + line = it.next() + # Get the first line of an entry. + m = ENTRY_BEGIN_RE.match(line) + if m: + self.curEntType = m.group(1).lower() + line = m.group(2) + line = self._parseEntry(line) + self.entryLine = 0 + else: + raise ParseError("Bad input at line %s (expected a new entry.)" + % it.lineno) + +def _advance(it,line): + while not line or line.isspace() or COMMENT_RE.match(line): + line = it.next() + return line + +# Matches a comment line outside of an entry. +OUTER_COMMENT_RE = re.compile(r'^\s*[\#\%]') +# Matches a comment line inside of an entry. +COMMENT_RE = re.compile(r'^\s*\%') +# Matches the start of an entry. group 1 is the type of the entry. +# group 2 is the rest of the line. +ENTRY_BEGIN_RE = re.compile(r'''^\s*\@([^\s\"\%\'\(\)\,\=\{\}]+)(.*)''') +# Start of an entry. group 1 is the keyword naming the entry. +BRACE_BEGIN_RE = re.compile(r'\s*\{(.*)') +BRACE_END_RE = re.compile(r'\s*\}(.*)') +KEY_RE = re.compile(r'''\s*([^\"\#\%\'\(\)\,\=\{\}\s]+)(.*)''') + +STRING_CLOSE_RE = re.compile(r'^([^\{\}\"]*)\"(.*)') +BRACE_CLOSE_RE = re.compile(r'^([^\{\}]*)\}(.*)') +BRACE_OPEN_RE = re.compile(r'^([^\{\}]*\{)(.*)') +RAW_DATA_RE = re.compile(r'^([^\s\},]+)(.*)') + +def parseFile(filename, result=None): + """Helper function: parse a single BibTeX file""" + f = FileIter(fname=filename) + p = Parser(f, {}, result) + r = p.parse() + r.resolve() + for e in r.entries: + e.check() + return r + +def parseString(string, result=None): + """Helper function: parse BibTeX from a string""" + f = FileIter(string=string) + p = Parser(f, {}, result) + r = p.parse() + r.resolve() + for e in r.entries: + e.check() + return r + +if __name__ == '__main__': + if len(sys.argv)>1: + fname=sys.argv[1] + else: + fname="testbib/pdos.bib" + + r = parseFile(fname) + + for e in r.entries: + if e.type in ("proceedings", "journal"): continue + print e.to_html() + diff --git a/i2p2www/anonbib/Makefile b/i2p2www/anonbib/Makefile new file mode 100644 index 00000000..90cb8e16 --- /dev/null +++ b/i2p2www/anonbib/Makefile @@ -0,0 +1,39 @@ +PYTHON=python +VERSION=0.3-dev + +all: + $(PYTHON) writeHTML.py anonbib.cfg + +clean: + rm -f *~ */*~ *.pyc *.pyo + +update: + $(PYTHON) updateCache.py anonbib.cfg + $(PYTHON) rank.py anonbib.cfg + +suggest: + $(PYTHON) rank.py suggest anonbib.cfg + +test: + $(PYTHON) test.py + +veryclean: clean + rm -f author.html date.html topic.html bibtex.html tmp.bib + +TEMPLATES=_template_.html _template_bibtex.html +CSS=css/main.css css/pubs.css +BIBTEX=anonbib.bib +SOURCE=BibTeX.py config.py metaphone.py reconcile.py updateCache.py \ + writeHTML.py rank.py tests.py +EXTRAS=TODO README Makefile ChangeLog anonbib.cfg gold.gif silver.gif \ + upb.gif ups.gif + +DISTFILES=$(TEMPLATES) $(CSS) $(BIBTEX) $(SOURCE) $(EXTRAS) + +dist: clean + rm -rf anonbib-$(VERSION) + mkdir anonbib-$(VERSION) + tar cf - $(DISTFILES) | (cd anonbib-$(VERSION); tar xf -) + mkdir anonbib-$(VERSION)/cache + tar czf anonbib-$(VERSION).tar.gz anonbib-$(VERSION) + rm -rf anonbib-$(VERSION) diff --git a/i2p2www/anonbib/README b/i2p2www/anonbib/README new file mode 100644 index 00000000..b15ea993 --- /dev/null +++ b/i2p2www/anonbib/README @@ -0,0 +1,52 @@ +anonbib 0.3 -- Code to generate the anonymity bibliography + +Copyright (c) 2003-2008 Nick Mathewson +Based on 'PDOSBib' perl code by Eddie Kohler + +This software is licensed under the GNU GPL, version 2 or later. + +To use this software, you need to understand BibTeX and Python a +little. If it breaks, you get to keep both pieces. You will need +Python 2.2 or later. + +To use this package: + - Get a good BibTeX file. You may want to mark it up with some of the + extra keys used in our "anonbib.bib" file. All of the additional + Bibtex keys we use have the prefix "www_"; check out anonbib.bib + for their usage. + + - Edit anonbib.cfg and _template_.html and _template_bibtex.html so they + refer to your files, authors, topics, and so on. + + - Run 'python updateCache.py anonbib.cfg' to create a local cache of the + papers in your bibliography based on their www_*_url entries. (By + default, the script will ignore any entries you have already cached. To + force a fresh download of a cached file, delete it.) + + - Run 'python rank.py anonbib.cfg' to download Google Scholar rankings of + all the papers. + + - Run 'python writeHTML.py anonbib.cfg'. Fix any errors you care about. + + - Re-run these scripts when you change the bibliography. + + - If you want to merge in big BibTeX files, try using the reconcile.py + script. See the comment at the start of the file for usage info. + + +New in 0.3: + - Support for Google Scholar rankings to denote hot/rising papers. + Implemented by George Danezis. + - Make reconcile script generate more useful output. + - Add support for multiple bibliographies generated from a single bibtex + source. This is done via 'tags' on bibtex entries. If an entry is + tagged, it appears in the corresponding bibliographies. This is good + for generating a master bibliography and one or more selected readings + lists from the same source. + - Handle more errors when downloading files. + - When fetching a paper with a .ps url, generate the .ps.gz file + automatically. + - Note an error when a crossref overrides an existing field in an entry. + - Handle the Proceedings type correctly. + - Enforce proper encoding on pages: it must be number--number. + - diff --git a/i2p2www/anonbib/TODO b/i2p2www/anonbib/TODO new file mode 100644 index 00000000..c208fc2e --- /dev/null +++ b/i2p2www/anonbib/TODO @@ -0,0 +1,33 @@ + + +- More general tasks + . Know about @book + . Write unit tests for everything + . Make name parsing vaguely sane + - Maybe uncrossref in tmp.bib + - Maybe pull important papers to the start of their sections? + . Clean \{}~ when going from note to url; add \{}~ when making + note from url. + . Also clean \_ to _ and back + - Look for urls in wherepublished. + . Forgive newlines in wherepub 1000 lished, note. + - When sorting by date, entries with unknown months go into a magic + "month zero" before January. Is this right? + - Strip unused features. + o Take a configuration file on the command line instead of just + importing config.py. + +- Cache tasks + - Generate a list of broken links + - Re-download all cached items if requested + - Clear dead items from cache + - Use HTTP HEAD requests to decide whetherto update stale + elements in cache. + - Add ability to honor a "www_no_cache={1}" option for entries + if the authors ask us not to cache them. + - Maybe, add ability to cache images from an HTML page. + +- Reconcile tasks + - Document it. + - Notice when there is new or different information of certain kinds + (pages, dates, etc) in the new information. diff --git a/i2p2www/anonbib/_template_.html b/i2p2www/anonbib/_template_.html new file mode 100644 index 00000000..b0eb5619 --- /dev/null +++ b/i2p2www/anonbib/_template_.html @@ -0,0 +1,74 @@ + + + +
+ + + + + + +%(otherbibs)s
+%(choices)s
+ + +
+
+
+
|
+
+
+
+Publications by %(field)s+ +
|
+
+
Please send new or corrected entries to
+
+<anonbib@freehaven.net>.
+
+If you can, please format them as BibTeX; see our
+BibTeX source page for examples.
+Remember to include URLs if possible:
+offline papers are
+less useful.
+
The source code to anonbib is now in Git. See the anonbib gitweb page for more information. +
+ + + diff --git a/i2p2www/anonbib/_template_bibtex.html b/i2p2www/anonbib/_template_bibtex.html new file mode 100644 index 00000000..88f6f94b --- /dev/null +++ b/i2p2www/anonbib/_template_bibtex.html @@ -0,0 +1,28 @@ + + + + + + + + + +