diff --git a/cmake/Install.cmake b/cmake/Install.cmake index f7d894e95..bb5543a61 100644 --- a/cmake/Install.cmake +++ b/cmake/Install.cmake @@ -136,7 +136,7 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/user_doc/man/man1/ PATTERN "*.1" PATTERN ${CONDEMNED_PAGE} EXCLUDE) -install(PROGRAMS share/tools/create_manpage_completions.py share/tools/deroff.py +install(PROGRAMS share/tools/create_manpage_completions.py DESTINATION ${rel_datadir}/fish/tools/) install(DIRECTORY share/tools/web_config diff --git a/share/functions/fish_update_completions.fish b/share/functions/fish_update_completions.fish index e44936e05..bb819e1cc 100644 --- a/share/functions/fish_update_completions.fish +++ b/share/functions/fish_update_completions.fish @@ -23,7 +23,7 @@ function fish_update_completions --description "Update man-page based completion $python $update_args end __fish_data_with_directory tools \ - 'create_manpage_completions\.py|deroff\.py' \ + 'create_manpage_completions\.py' \ __fish_update_completions $argv __fish_with_status functions --erase __fish_update_completions end diff --git a/share/tools/create_manpage_completions.py b/share/tools/create_manpage_completions.py index 5cda02355..726bec9a1 100755 --- a/share/tools/create_manpage_completions.py +++ b/share/tools/create_manpage_completions.py @@ -18,7 +18,6 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND """ from __future__ import print_function -from deroff import Deroffer import argparse import bz2 import errno @@ -47,6 +46,1120 @@ except ImportError: # Whether we're Python 3 IS_PY3 = sys.version_info[0] >= 3 + +class Deroffer: + g_specs_specletter = { + # Output composed latin1 letters + "-D": "\320", + "Sd": "\360", + "Tp": "\376", + "TP": "\336", + "AE": "\306", + "ae": "\346", + "OE": "OE", + "oe": "oe", + ":a": "\344", + ":A": "\304", + ":e": "\353", + ":E": "\313", + ":i": "\357", + ":I": "\317", + ":o": "\366", + ":O": "\326", + ":u": "\374", + ":U": "\334", + ":y": "\377", + "ss": "\337", + "'A": "\301", + "'E": "\311", + "'I": "\315", + "'O": "\323", + "'U": "\332", + "'Y": "\335", + "'a": "\341", + "'e": "\351", + "'i": "\355", + "'o": "\363", + "'u": "\372", + "'y": "\375", + "^A": "\302", + "^E": "\312", + "^I": "\316", + "^O": "\324", + "^U": "\333", + "^a": "\342", + "^e": "\352", + "^i": "\356", + "^o": "\364", + "^u": "\373", + "`A": "\300", + "`E": "\310", + "`I": "\314", + "`O": "\322", + "`U": "\331", + "`a": "\340", + "`e": "\350", + "`i": "\354", + "`o": "\362", + "`u": "\371", + "~A": "\303", + "~N": "\321", + "~O": "\325", + "~a": "\343", + "~n": "\361", + "~o": "\365", + ",C": "\307", + ",c": "\347", + "/l": "/l", + "/L": "/L", + "/o": "\370", + "/O": "\330", + "oA": "\305", + "oa": "\345", + # Ligatures + "fi": "fi", + "ff": "ff", + "fl": "fl", + "Fi": "ffi", + "Ff": "fff", + "Fl": "ffl", + } + + g_specs = { + "mi": "-", + "en": "-", + "hy": "-", + "em": "--", + "lq": "“", + "rq": "”", + "Bq": ",,", + "oq": "`", + "cq": "'", + "aq": "'", + "dq": '"', + "or": "|", + "at": "@", + "sh": "#", + "Eu": "\244", + "eu": "\244", + "Do": "$", + "ct": "\242", + "Fo": "\253", + "Fc": "\273", + "fo": "<", + "fc": ">", + "r!": "\241", + "r?": "\277", + "Of": "\252", + "Om": "\272", + "pc": "\267", + "S1": "\271", + "S2": "\262", + "S3": "\263", + "<-": "<-", + "->": "->", + "<>": "<->", + "ua": "^", + "da": "v", + "lA": "<=", + "rA": "=>", + "hA": "<=>", + "uA": "^^", + "dA": "vv", + "ba": "|", + "bb": "|", + "br": "|", + "bv": "|", + "ru": "_", + "ul": "_", + "ci": "O", + "bu": "o", + "co": "\251", + "rg": "\256", + "tm": "(TM)", + "dd": "||", + "dg": "|", + "ps": "\266", + "sc": "\247", + "de": "\260", + "%0": "0/00", + "14": "\274", + "12": "\275", + "34": "\276", + "f/": "/", + "sl": "/", + "rs": "\\", + "sq": "[]", + "fm": "'", + "ha": "^", + "ti": "~", + "lB": "[", + "rB": "]", + "lC": "{", + "rC": "}", + "la": "<", + "ra": ">", + "lh": "<=", + "rh": "=>", + "tf": "therefore", + "~~": "~~", + "~=": "~=", + "!=": "!=", + "**": "*", + "+-": "\261", + "<=": "<=", + "==": "==", + "=~": "=~", + ">=": ">=", + "AN": "\\/", + "OR": "/\\", + "no": "\254", + "te": "there exists", + "fa": "for all", + "Ah": "aleph", + "Im": "imaginary", + "Re": "real", + "if": "infinity", + "md": "\267", + "mo": "member of", + "mu": "\327", + "nm": "not member of", + "pl": "+", + "eq": "=", + "pt": "oc", + "pp": "perpendicular", + "sb": "(=", + "sp": "=)", + "ib": "(-", + "ip": "-)", + "ap": "~", + "is": "I", + "sr": "root", + "pd": "d", + "c*": "(x)", + "c+": "(+)", + "ca": "cap", + "cu": "U", + "di": "\367", + "gr": "V", + "es": "{}", + "CR": "_|", + "st": "such that", + "/_": "/_", + "lz": "<>", + "an": "-", + # Output Greek + "*A": "Alpha", + "*B": "Beta", + "*C": "Xi", + "*D": "Delta", + "*E": "Epsilon", + "*F": "Phi", + "*G": "Gamma", + "*H": "Theta", + "*I": "Iota", + "*K": "Kappa", + "*L": "Lambda", + "*M": "Mu", + "*N": "Nu", + "*O": "Omicron", + "*P": "Pi", + "*Q": "Psi", + "*R": "Rho", + "*S": "Sigma", + "*T": "Tau", + "*U": "Upsilon", + "*W": "Omega", + "*X": "Chi", + "*Y": "Eta", + "*Z": "Zeta", + "*a": "alpha", + "*b": "beta", + "*c": "xi", + "*d": "delta", + "*e": "epsilon", + "*f": "phi", + "+f": "phi", + "*g": "gamma", + "*h": "theta", + "+h": "theta", + "*i": "iota", + "*k": "kappa", + "*l": "lambda", + "*m": "\265", + "*n": "nu", + "*o": "omicron", + "*p": "pi", + "+p": "omega", + "*q": "psi", + "*r": "rho", + "*s": "sigma", + "*t": "tau", + "*u": "upsilon", + "*w": "omega", + "*x": "chi", + "*y": "eta", + "*z": "zeta", + "ts": "sigma", + } + + g_re_word = re.compile(r"[a-zA-Z_]+") # equivalent to the word() method + g_re_number = re.compile(r"[+-]?\d+") # equivalent to the number() method + g_re_esc_char = re.compile( + r"""([a-zA-Z_]) | # Word + ([+-]?\d) | # Number + \\ # Backslash (for escape seq) + """, + re.VERBOSE, + ) + + g_re_not_backslash_or_whitespace = re.compile( + r"[^ \t\n\r\f\v\\]+" + ) # Match a sequence of not backslash or whitespace + + g_re_newline_collapse = re.compile(r"\n{3,}") + + g_re_font = re.compile( + r"""\\f( # Starts with backslash f + (\(\S{2}) | # Open paren, then two printable chars + (\[\S*?\]) | # Open bracket, zero or more printable characters, then close bracket + \S) # Any printable character + """, + re.VERBOSE, + ) + + # This gets filled in in __init__ below + g_macro_dict = False + + def __init__(self): + self.reg_table = {} + self.tr_from = "" + self.tr_to = "" + self.tr = "" + self.nls = 2 + self.specletter = False + self.refer = False + self.macro = 0 + self.nobody = False + self.inlist = False + self.inheader = False + self.pic = False + self.tbl = False + self.tblstate = 0 + self.tblTab = "" + self.eqn = False + self.skipheaders = False + self.skiplists = False + self.ignore_sonx = False + self.output = [] + self.name = "" + + self.OPTIONS = 0 + self.FORMAT = 1 + self.DATA = 2 + + # words is uninteresting and should be treated as false + + if not Deroffer.g_macro_dict: + Deroffer.g_macro_dict = { + "SH": Deroffer.macro_sh, + "SS": Deroffer.macro_ss_ip, + "IP": Deroffer.macro_ss_ip, + "H ": Deroffer.macro_ss_ip, + "I ": Deroffer.macro_i_ir, + "IR": Deroffer.macro_i_ir, + "IB": Deroffer.macro_i_ir, + "B ": Deroffer.macro_i_ir, + "BR": Deroffer.macro_i_ir, + "BI": Deroffer.macro_i_ir, + "R ": Deroffer.macro_i_ir, + "RB": Deroffer.macro_i_ir, + "RI": Deroffer.macro_i_ir, + "AB": Deroffer.macro_i_ir, + "Nm": Deroffer.macro_Nm, + "] ": Deroffer.macro_close_bracket, + "PS": Deroffer.macro_ps, + "PE": Deroffer.macro_pe, + "TS": Deroffer.macro_ts, + "T&": Deroffer.macro_t_and, + "TE": Deroffer.macro_te, + "EQ": Deroffer.macro_eq, + "EN": Deroffer.macro_en, + "R1": Deroffer.macro_r1, + "R2": Deroffer.macro_r2, + "de": Deroffer.macro_de, + "BL": Deroffer.macro_bl_vl, + "VL": Deroffer.macro_bl_vl, + "AL": Deroffer.macro_bl_vl, + "LB": Deroffer.macro_bl_vl, + "RL": Deroffer.macro_bl_vl, + "ML": Deroffer.macro_bl_vl, + "DL": Deroffer.macro_bl_vl, + "BV": Deroffer.macro_bv, + "LE": Deroffer.macro_le, + "LP": Deroffer.macro_lp_pp, + "PP": Deroffer.macro_lp_pp, + "P\n": Deroffer.macro_lp_pp, + "ds": Deroffer.macro_ds, + "so": Deroffer.macro_so_nx, + "nx": Deroffer.macro_so_nx, + "tr": Deroffer.macro_tr, + "sp": Deroffer.macro_sp, + } + + def flush_output(self, where): + if where: + where.write(self.get_output()) + self.output[:] = [] + + def get_output(self): + res = "".join(self.output) + clean_res = Deroffer.g_re_newline_collapse.sub("\n", res) + return clean_res + + def putchar(self, c): + self.output.append(c) + return c + + # This gets swapped in in place of condputs the first time tr gets modified + def condputs_tr(self, str): + special = ( + self.pic + or self.eqn + or self.refer + or self.macro + or (self.skiplists and self.inlist) + or (self.skipheaders and self.inheader) + ) + if not special: + self.output.append(str.translate(self.tr)) + + def condputs(self, str): + special = ( + self.pic + or self.eqn + or self.refer + or self.macro + or (self.skiplists and self.inlist) + or (self.skipheaders and self.inheader) + ) + if not special: + self.output.append(str) + + def str_at(self, idx): + return self.s[idx : idx + 1] + + def skip_char(self, amt=1): + self.s = self.s[amt:] + + def skip_leading_whitespace(self): + self.s = self.s.lstrip() + + def is_white(self, idx): + # Note this returns false for empty strings (idx >= len(self.s)) + return self.s[idx : idx + 1].isspace() + + def str_eq(offset, other, len): + return self.s[offset : offset + len] == other[:len] + + def prch(self, idx): + # Note that this return False for the empty string (idx >= len(self.s)) + ch = self.s[idx : idx + 1] + return ch not in " \t\n" + + def font(self): + match = Deroffer.g_re_font.match(self.s) + if not match: + return False + self.skip_char(match.end()) + return True + + def font2(self): + if self.s[0:2] == "\\f": + c = self.str_at(2) + if c == "(" and self.prch(3) and self.prch(4): + self.skip_char(5) + return True + elif c == "[": + self.skip_char(2) + while self.prch(0) and self.str_at(0) != "]": + self.skip_char() + if self.str_at(0) == "]": + self.skip_char() + elif self.prch(2): + self.skip_char(3) + return True + return False + + def comment(self): + # Here we require that the string start with \" + while self.str_at(0) and self.str_at(0) != "\n": + self.skip_char() + return True + + def numreq(self): + # We require that the string starts with backslash + if self.str_at(1) in "hvwud" and self.str_at(2) == "'": + self.macro += 1 + self.skip_char(3) + while self.str_at(0) != "'" and self.esc_char(): + pass # Weird + if self.str_at(0) == "'": + self.skip_char() + self.macro -= 1 + return True + return False + + def var(self): + reg = "" + s0s1 = self.s[0:2] + if s0s1 == "\\n": + if self.s[3:5] == "dy": + self.skip_char(5) + return True + elif self.str_at(2) == "(" and self.prch(3) and self.prch(4): + self.skip_char(5) + return True + elif self.str_at(2) == "[" and self.prch(3): + self.skip_char(3) + while self.str_at(0) and self.str_at(0) != "]": + self.skip_char() + return True + elif self.prch(2): + self.skip_char(3) + return True + elif s0s1 == "\\*": + if self.str_at(2) == "(" and self.prch(3) and self.prch(4): + reg = self.s[3:5] + self.skip_char(5) + elif self.str_at(2) == "[" and self.prch(3): + self.skip_char(3) + while self.str_at(0) and self.str_at(0) != "]": + reg = reg + self.str_at(0) + self.skip_char() + if self.s[0:1] == "]": + self.skip_char() + else: + return False + elif self.prch(2): + reg = self.str_at(2) + self.skip_char(3) + else: + return False + + if reg in self.reg_table: + old_s = self.s + self.s = self.reg_table[reg] + self.text_arg() + return True + return False + + def size(self): + # We require that the string starts with \s + if self.digit(2) or (self.str_at(2) in "-+" and self.digit(3)): + self.skip_char(3) + while self.digit(0): + self.skip_char() + return True + return False + + def spec(self): + self.specletter = False + if self.s[0:2] == "\\(" and self.prch(2) and self.prch(3): + key = self.s[2:4] + if key in Deroffer.g_specs_specletter: + self.condputs(Deroffer.g_specs_specletter[key]) + self.specletter = True + elif key in Deroffer.g_specs: + self.condputs(Deroffer.g_specs[key]) + self.skip_char(4) + return True + elif self.s.startswith("\\%"): + self.specletter = True + self.skip_char(2) + return True + else: + return False + + def esc(self): + # We require that the string start with backslash + c = self.s[1:2] + if not c: + return False + if c in "eE": + self.condputs("\\") + elif c in "t": + self.condputs("\t") + elif c in "0~": + self.condputs(" ") + elif c in "|^&:": + pass + else: + self.condputs(c) + self.skip_char(2) + return True + + def word(self): + got_something = False + while True: + match = Deroffer.g_re_word.match(self.s) + if not match: + break + got_something = True + self.condputs(match.group(0)) + self.skip_char(match.end(0)) + + # Consume all specials + while self.spec(): + if not self.specletter: + break + + return got_something + + def text(self): + while True: + idx = self.s.find("\\") + if idx == -1: + self.condputs(self.s) + self.s = "" + break + else: + self.condputs(self.s[:idx]) + self.skip_char(idx) + if not self.esc_char_backslash(): + self.condputs(self.str_at(0)) + self.skip_char() + return True + + def letter(self, idx): + ch = self.str_at(idx) + return ch.isalpha() or ch == "_" # underscore is used in C identifiers + + def digit(self, idx): + ch = self.str_at(idx) + return ch.isdigit() + + def number(self): + match = Deroffer.g_re_number.match(self.s) + if not match: + return False + else: + self.condputs(match.group(0)) + self.skip_char(match.end()) + return True + + def esc_char_backslash(self): + # Like esc_char, but we know the string starts with a backslash + c = self.s[1:2] + if c == '"': + return self.comment() + elif c == "f": + return self.font() + elif c == "s": + return self.size() + elif c in "hvwud": + return self.numreq() + elif c in "n*": + return self.var() + elif c == "(": + return self.spec() + else: + return self.esc() + + def esc_char(self): + if self.s[0:1] == "\\": + return self.esc_char_backslash() + return self.word() or self.number() + + def quoted_arg(self): + if self.str_at(0) == '"': + self.skip_char() + while self.s and self.str_at(0) != '"': + if not self.esc_char(): + if self.s: + self.condputs(self.str_at(0)) + self.skip_char() + return True + else: + return False + + def text_arg(self): + # PCA: The deroff.c textArg() disallowed quotes at the start of an argument + # I'm not sure if this was a bug or not + got_something = False + while True: + match = Deroffer.g_re_not_backslash_or_whitespace.match(self.s) + if match: + # Output the characters in the match + self.condputs(match.group(0)) + self.skip_char(match.end(0)) + got_something = True + + # Next is either an escape, or whitespace, or the end + # If it's the whitespace or the end, we're done + if not self.s or self.is_white(0): + return got_something + + # Try an escape + if not self.esc_char(): + # Some busted escape? Just output it + self.condputs(self.str_at(0)) + self.skip_char() + got_something = True + + def text_arg2(self): + if not self.esc_char(): + if self.s and not self.is_white(0): + self.condputs(self.str_at(0)) + self.skip_char() + else: + return False + while True: + if not self.esc_char(): + if self.s and not self.is_white(0): + self.condputs(self.str_at(0)) + self.skip_char() + else: + return True + + # Macro functions + def macro_sh(self): + for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']: + if self.s[2:].startswith(header_str): + self.inheader = True + break + else: + # Did not find a header string + self.inheader = False + self.nobody = True + + def macro_ss_ip(self): + self.nobody = True + return False + + def macro_i_ir(self): + return False + + def macro_Nm(self): + if self.s == "Nm\n": + self.condputs(self.name) + else: + self.name = self.s[3:].strip() + " " + return True + + def macro_close_bracket(self): + self.refer = False + return False + + def macro_ps(self): + if self.is_white(2): + self.pic = True + self.condputs("\n") + return True + + def macro_pe(self): + if self.is_white(2): + self.pic = False + self.condputs("\n") + return True + + def macro_ts(self): + if self.is_white(2): + self.tbl, self.tblstate = True, self.OPTIONS + self.condputs("\n") + return True + + def macro_t_and(self): + if self.is_white(2): + self.tbl, self.tblstate = True, self.FORMAT + self.condputs("\n") + return True + + def macro_te(self): + if self.is_white(2): + self.tbl = False + self.condputs("\n") + return True + + def macro_eq(self): + if self.is_white(2): + self.eqn = True + self.condputs("\n") + return True + + def macro_en(self): + if self.is_white(2): + self.eqn = False + self.condputs("\n") + return True + + def macro_r1(self): + if self.is_white(2): + self.refer2 = True + self.condputs("\n") + return True + + def macro_r2(self): + if self.is_white(2): + self.refer2 = False + self.condputs("\n") + return True + + def macro_de(self): + macro = True + self.condputs("\n") + return True + + def macro_bl_vl(self): + if self.is_white(2): + self.inlist = True + self.condputs("\n") + return True + + def macro_bv(self): + if self.str_at(2) == "L" and self.white(self.str_at(3)): + self.inlist = True + self.condputs("\n") + return True + + def macro_le(self): + if self.is_white(2): + self.inlist = False + self.condputs("\n") + return True + + def macro_lp_pp(self): + self.condputs("\n") + return True + + def macro_ds(self): + self.skip_char(2) + self.skip_leading_whitespace() + if self.str_at(0): + # Split at whitespace + comps = self.s.split(None, 2) + if len(comps) == 2: + name, value = comps + value = value.rstrip() + self.reg_table[name] = value + self.condputs("\n") + return True + + def macro_so_nx(self): + # We always ignore include directives + # deroff.c for some reason allowed this to fall through to the 'tr' case + # I think that was just a bug so I won't replicate it + return True + + def macro_tr(self): + self.skip_char(2) + self.skip_leading_whitespace() + while self.s and self.str_at(0) != "\n": + c = self.str_at(0) + ns = self.str_at(1) + self.skip_char(2) + if not ns or ns == "\n": + ns = " " + self.tr_from += c + self.tr_to += ns + + # Update our table, then swap in the slower tr-savvy condputs + try: # Python2 + self.tr = string.maketrans(self.tr_from, self.tr_to) + except AttributeError: # Python3 + self.tr = "".maketrans(self.tr_from, self.tr_to) + self.condputs = self.condputs_tr + return True + + def macro_sp(self): + self.condputs("\n") + return True + + def macro_other(self): + self.condputs("\n") + return True + + def request_or_macro(self): + # s[0] is period or open single quote + self.skip_char() + s0 = self.s[1:2] + if s0 == "\\": + if self.str_at(1) == '"': + self.condputs("\n") + return True + else: + pass + elif s0 == "[": + self.refer = True + self.condputs("\n") + return True + elif s0 == "]": + self.refer = False + self.skip_char() + return self.text() + elif s0 == ".": + self.macro = False + self.condputs("\n") + return True + + self.nobody = False + s0s1 = self.s[0:2] + + macro_func = Deroffer.g_macro_dict.get(s0s1, Deroffer.macro_other) + if macro_func(self): + return True + + if self.skipheaders and self.nobody: + return True + + self.skip_leading_whitespace() + while self.s and not self.is_white(0): + self.skip_char() + self.skip_leading_whitespace() + while True: + if not self.quoted_arg() and not self.text_arg(): + if self.s: + self.condputs(self.str_at(0)) + self.skip_char() + else: + return True + + def request_or_macro2(self): + self.skip_char() + s0 = self.s[0:1] + if s0 == "\\": + if self.str_at(1) == '"': + self.condputs("\n") + return True + else: + pass + elif s0 == "[": + self.refer = True + self.condputs("\n") + return True + elif s0 == "]": + self.refer = False + self.skip_char() + return self.text() + elif s0 == ".": + self.macro = False + self.condputs("\n") + return True + + self.nobody = False + s0s1 = self.s[0:2] + if s0s1 == "SH": + for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']: + if self.s[2:].startswith(header_str): + self.inheader = True + break + else: + # Did not find a header string + self.inheader = False + self.nobody = True + elif s0s1 in ["SS", "IP", "H "]: + self.nobody = True + elif s0s1 in ["I ", "IR", "IB", "B ", "BR", "BI", "R ", "RB", "RI", "AB"]: + pass + elif s0s1 in ["] "]: + self.refer = False + elif s0s1 in ["PS"]: + if self.is_white(2): + self.pic = True + self.condputs("\n") + return True + elif s0s1 in ["PE"]: + if self.is_white(2): + self.pic = False + self.condputs("\n") + return True + elif s0s1 in ["TS"]: + if self.is_white(2): + self.tbl, self.tblstate = True, self.OPTIONS + self.condputs("\n") + return True + elif s0s1 in ["T&"]: + if self.is_white(2): + self.tbl, self.tblstate = True, self.FORMAT + self.condputs("\n") + return True + elif s0s1 in ["TE"]: + if self.is_white(2): + self.tbl = False + self.condputs("\n") + return True + elif s0s1 in ["EQ"]: + if self.is_white(2): + self.eqn = True + self.condputs("\n") + return True + elif s0s1 in ["EN"]: + if self.is_white(2): + self.eqn = False + self.condputs("\n") + return True + elif s0s1 in ["R1"]: + if self.is_white(2): + self.refer2 = True + self.condputs("\n") + return True + elif s0s1 in ["R2"]: + if self.is_white(2): + self.refer2 = False + self.condputs("\n") + return True + elif s0s1 in ["de"]: + macro = True + self.condputs("\n") + return True + elif s0s1 in ["BL", "VL", "AL", "LB", "RL", "ML", "DL"]: + if self.is_white(2): + self.inlist = True + self.condputs("\n") + return True + elif s0s1 in ["BV"]: + if self.str_at(2) == "L" and self.white(self.str_at(3)): + self.inlist = True + self.condputs("\n") + return True + elif s0s1 in ["LE"]: + if self.is_white(2): + self.inlist = False + self.condputs("\n") + return True + elif s0s1 in ["LP", "PP", "P\n"]: + self.condputs("\n") + return True + elif s0s1 in ["ds"]: + self.skip_char(2) + self.skip_leading_whitespace() + if self.str_at(0): + # Split at whitespace + comps = self.s.split(None, 2) + if len(comps) == 2: + name, value = comps + value = value.rstrip() + self.reg_table[name] = value + self.condputs("\n") + return True + elif s0s1 in ["so", "nx"]: + # We always ignore include directives + # deroff.c for some reason allowed this to fall through to the 'tr' case + # I think that was just a bug so I won't replicate it + return True + elif s0s1 in ["tr"]: + self.skip_char(2) + self.skip_leading_whitespace() + while self.s and self.str_at(0) != "\n": + c = self.str_at(0) + ns = self.str_at(1) + self.skip_char(2) + if not ns or ns == "\n": + ns = " " + self.tr_from += c + self.tr_to += ns + + # Update our table, then swap in the slower tr-savvy condputs + try: # Python2 + self.tr = string.maketrans(self.tr_from, self.tr_to) + except AttributeError: # Python3 + self.tr = "".maketrans(self.tr_from, self.tr_to) + self.condputs = self.condputs_tr + + return True + elif s0s1 in ["sp"]: + self.condputs("\n") + return True + else: + self.condputs("\n") + return True + + if self.skipheaders and self.nobody: + return True + + self.skip_leading_whitespace() + while self.s and not self.is_white(0): + self.skip_char() + self.skip_leading_whitespace() + while True: + if not self.quoted_arg() and not self.text_arg(): + if self.s: + self.condputs(self.str_at(0)) + self.skip_char() + else: + return True + + def do_tbl(self): + if self.tblstate == self.OPTIONS: + while self.s and self.str_at(0) != ";" and self.str_at(0) != "\n": + self.skip_leading_whitespace() + if not self.str_at(0).isalpha(): + # deroff.c has a bug where it can loop forever here...we try to work around it + self.skip_char() + else: # Parse option + option = self.s + arg = "" + + idx = 0 + while option[idx : idx + 1].isalpha(): + idx += 1 + + if option[idx : idx + 1] == "(": + option = option[:idx] + self.s = self.s[idx + 1 :] + arg = self.s + else: + self.s = "" + + if arg: + idx = arg.find(")") + if idx != -1: + arg = arg[:idx] + self.s = self.s[idx + 1 :] + else: + # self.skip_char() + pass + + if option.lower() == "tab": + self.tblTab = arg[0:1] + + self.tblstate = self.FORMAT + self.condputs("\n") + + elif self.tblstate == self.FORMAT: + while self.s and self.str_at(0) != "." and self.str_at(0) != "\n": + self.skip_leading_whitespace() + if self.str_at(0): + self.skip_char() + + if self.str_at(0) == ".": + self.tblstate = self.DATA + self.condputs("\n") + elif self.tblstate == self.DATA: + if self.tblTab: + self.s = self.s.replace(self.tblTab, "\t") + self.text() + return True + + def do_line(self): + if self.s[0:1] in ".'": + if not self.request_or_macro(): + return False + elif self.tbl: + self.do_tbl() + else: + self.text() + return True + + def deroff(self, str): + lines = str.split("\n") + for line in lines: + self.s = line + "\n" + if not self.do_line(): + break + # self.putchar('\n') + + # This gets set to the name of the command that we are currently executing CMDNAME = "" diff --git a/share/tools/deroff.py b/share/tools/deroff.py deleted file mode 100755 index 1f7017901..000000000 --- a/share/tools/deroff.py +++ /dev/null @@ -1,1149 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Deroff.py, ported to Python from the venerable deroff.c""" - -import sys, re, string - -IS_PY3 = sys.version_info[0] >= 3 - - -class Deroffer: - g_specs_specletter = { - # Output composed latin1 letters - "-D": "\320", - "Sd": "\360", - "Tp": "\376", - "TP": "\336", - "AE": "\306", - "ae": "\346", - "OE": "OE", - "oe": "oe", - ":a": "\344", - ":A": "\304", - ":e": "\353", - ":E": "\313", - ":i": "\357", - ":I": "\317", - ":o": "\366", - ":O": "\326", - ":u": "\374", - ":U": "\334", - ":y": "\377", - "ss": "\337", - "'A": "\301", - "'E": "\311", - "'I": "\315", - "'O": "\323", - "'U": "\332", - "'Y": "\335", - "'a": "\341", - "'e": "\351", - "'i": "\355", - "'o": "\363", - "'u": "\372", - "'y": "\375", - "^A": "\302", - "^E": "\312", - "^I": "\316", - "^O": "\324", - "^U": "\333", - "^a": "\342", - "^e": "\352", - "^i": "\356", - "^o": "\364", - "^u": "\373", - "`A": "\300", - "`E": "\310", - "`I": "\314", - "`O": "\322", - "`U": "\331", - "`a": "\340", - "`e": "\350", - "`i": "\354", - "`o": "\362", - "`u": "\371", - "~A": "\303", - "~N": "\321", - "~O": "\325", - "~a": "\343", - "~n": "\361", - "~o": "\365", - ",C": "\307", - ",c": "\347", - "/l": "/l", - "/L": "/L", - "/o": "\370", - "/O": "\330", - "oA": "\305", - "oa": "\345", - # Ligatures - "fi": "fi", - "ff": "ff", - "fl": "fl", - "Fi": "ffi", - "Ff": "fff", - "Fl": "ffl", - } - - g_specs = { - "mi": "-", - "en": "-", - "hy": "-", - "em": "--", - "lq": "“", - "rq": "”", - "Bq": ",,", - "oq": "`", - "cq": "'", - "aq": "'", - "dq": '"', - "or": "|", - "at": "@", - "sh": "#", - "Eu": "\244", - "eu": "\244", - "Do": "$", - "ct": "\242", - "Fo": "\253", - "Fc": "\273", - "fo": "<", - "fc": ">", - "r!": "\241", - "r?": "\277", - "Of": "\252", - "Om": "\272", - "pc": "\267", - "S1": "\271", - "S2": "\262", - "S3": "\263", - "<-": "<-", - "->": "->", - "<>": "<->", - "ua": "^", - "da": "v", - "lA": "<=", - "rA": "=>", - "hA": "<=>", - "uA": "^^", - "dA": "vv", - "ba": "|", - "bb": "|", - "br": "|", - "bv": "|", - "ru": "_", - "ul": "_", - "ci": "O", - "bu": "o", - "co": "\251", - "rg": "\256", - "tm": "(TM)", - "dd": "||", - "dg": "|", - "ps": "\266", - "sc": "\247", - "de": "\260", - "%0": "0/00", - "14": "\274", - "12": "\275", - "34": "\276", - "f/": "/", - "sl": "/", - "rs": "\\", - "sq": "[]", - "fm": "'", - "ha": "^", - "ti": "~", - "lB": "[", - "rB": "]", - "lC": "{", - "rC": "}", - "la": "<", - "ra": ">", - "lh": "<=", - "rh": "=>", - "tf": "therefore", - "~~": "~~", - "~=": "~=", - "!=": "!=", - "**": "*", - "+-": "\261", - "<=": "<=", - "==": "==", - "=~": "=~", - ">=": ">=", - "AN": "\\/", - "OR": "/\\", - "no": "\254", - "te": "there exists", - "fa": "for all", - "Ah": "aleph", - "Im": "imaginary", - "Re": "real", - "if": "infinity", - "md": "\267", - "mo": "member of", - "mu": "\327", - "nm": "not member of", - "pl": "+", - "eq": "=", - "pt": "oc", - "pp": "perpendicular", - "sb": "(=", - "sp": "=)", - "ib": "(-", - "ip": "-)", - "ap": "~", - "is": "I", - "sr": "root", - "pd": "d", - "c*": "(x)", - "c+": "(+)", - "ca": "cap", - "cu": "U", - "di": "\367", - "gr": "V", - "es": "{}", - "CR": "_|", - "st": "such that", - "/_": "/_", - "lz": "<>", - "an": "-", - # Output Greek - "*A": "Alpha", - "*B": "Beta", - "*C": "Xi", - "*D": "Delta", - "*E": "Epsilon", - "*F": "Phi", - "*G": "Gamma", - "*H": "Theta", - "*I": "Iota", - "*K": "Kappa", - "*L": "Lambda", - "*M": "Mu", - "*N": "Nu", - "*O": "Omicron", - "*P": "Pi", - "*Q": "Psi", - "*R": "Rho", - "*S": "Sigma", - "*T": "Tau", - "*U": "Upsilon", - "*W": "Omega", - "*X": "Chi", - "*Y": "Eta", - "*Z": "Zeta", - "*a": "alpha", - "*b": "beta", - "*c": "xi", - "*d": "delta", - "*e": "epsilon", - "*f": "phi", - "+f": "phi", - "*g": "gamma", - "*h": "theta", - "+h": "theta", - "*i": "iota", - "*k": "kappa", - "*l": "lambda", - "*m": "\265", - "*n": "nu", - "*o": "omicron", - "*p": "pi", - "+p": "omega", - "*q": "psi", - "*r": "rho", - "*s": "sigma", - "*t": "tau", - "*u": "upsilon", - "*w": "omega", - "*x": "chi", - "*y": "eta", - "*z": "zeta", - "ts": "sigma", - } - - g_re_word = re.compile(r"[a-zA-Z_]+") # equivalent to the word() method - g_re_number = re.compile(r"[+-]?\d+") # equivalent to the number() method - g_re_esc_char = re.compile( - r"""([a-zA-Z_]) | # Word - ([+-]?\d) | # Number - \\ # Backslash (for escape seq) - """, - re.VERBOSE, - ) - - g_re_not_backslash_or_whitespace = re.compile( - r"[^ \t\n\r\f\v\\]+" - ) # Match a sequence of not backslash or whitespace - - g_re_newline_collapse = re.compile(r"\n{3,}") - - g_re_font = re.compile( - r"""\\f( # Starts with backslash f - (\(\S{2}) | # Open paren, then two printable chars - (\[\S*?\]) | # Open bracket, zero or more printable characters, then close bracket - \S) # Any printable character - """, - re.VERBOSE, - ) - - # This gets filled in in __init__ below - g_macro_dict = False - - def __init__(self): - self.reg_table = {} - self.tr_from = "" - self.tr_to = "" - self.tr = "" - self.nls = 2 - self.specletter = False - self.refer = False - self.macro = 0 - self.nobody = False - self.inlist = False - self.inheader = False - self.pic = False - self.tbl = False - self.tblstate = 0 - self.tblTab = "" - self.eqn = False - self.skipheaders = False - self.skiplists = False - self.ignore_sonx = False - self.output = [] - self.name = "" - - self.OPTIONS = 0 - self.FORMAT = 1 - self.DATA = 2 - - # words is uninteresting and should be treated as false - - if not Deroffer.g_macro_dict: - Deroffer.g_macro_dict = { - "SH": Deroffer.macro_sh, - "SS": Deroffer.macro_ss_ip, - "IP": Deroffer.macro_ss_ip, - "H ": Deroffer.macro_ss_ip, - "I ": Deroffer.macro_i_ir, - "IR": Deroffer.macro_i_ir, - "IB": Deroffer.macro_i_ir, - "B ": Deroffer.macro_i_ir, - "BR": Deroffer.macro_i_ir, - "BI": Deroffer.macro_i_ir, - "R ": Deroffer.macro_i_ir, - "RB": Deroffer.macro_i_ir, - "RI": Deroffer.macro_i_ir, - "AB": Deroffer.macro_i_ir, - "Nm": Deroffer.macro_Nm, - "] ": Deroffer.macro_close_bracket, - "PS": Deroffer.macro_ps, - "PE": Deroffer.macro_pe, - "TS": Deroffer.macro_ts, - "T&": Deroffer.macro_t_and, - "TE": Deroffer.macro_te, - "EQ": Deroffer.macro_eq, - "EN": Deroffer.macro_en, - "R1": Deroffer.macro_r1, - "R2": Deroffer.macro_r2, - "de": Deroffer.macro_de, - "BL": Deroffer.macro_bl_vl, - "VL": Deroffer.macro_bl_vl, - "AL": Deroffer.macro_bl_vl, - "LB": Deroffer.macro_bl_vl, - "RL": Deroffer.macro_bl_vl, - "ML": Deroffer.macro_bl_vl, - "DL": Deroffer.macro_bl_vl, - "BV": Deroffer.macro_bv, - "LE": Deroffer.macro_le, - "LP": Deroffer.macro_lp_pp, - "PP": Deroffer.macro_lp_pp, - "P\n": Deroffer.macro_lp_pp, - "ds": Deroffer.macro_ds, - "so": Deroffer.macro_so_nx, - "nx": Deroffer.macro_so_nx, - "tr": Deroffer.macro_tr, - "sp": Deroffer.macro_sp, - } - - def flush_output(self, where): - if where: - where.write(self.get_output()) - self.output[:] = [] - - def get_output(self): - res = "".join(self.output) - clean_res = Deroffer.g_re_newline_collapse.sub("\n", res) - return clean_res - - def putchar(self, c): - self.output.append(c) - return c - - # This gets swapped in in place of condputs the first time tr gets modified - def condputs_tr(self, str): - special = ( - self.pic - or self.eqn - or self.refer - or self.macro - or (self.skiplists and self.inlist) - or (self.skipheaders and self.inheader) - ) - if not special: - self.output.append(str.translate(self.tr)) - - def condputs(self, str): - special = ( - self.pic - or self.eqn - or self.refer - or self.macro - or (self.skiplists and self.inlist) - or (self.skipheaders and self.inheader) - ) - if not special: - self.output.append(str) - - def str_at(self, idx): - return self.s[idx : idx + 1] - - def skip_char(self, amt=1): - self.s = self.s[amt:] - - def skip_leading_whitespace(self): - self.s = self.s.lstrip() - - def is_white(self, idx): - # Note this returns false for empty strings (idx >= len(self.s)) - return self.s[idx : idx + 1].isspace() - - def str_eq(offset, other, len): - return self.s[offset : offset + len] == other[:len] - - def prch(self, idx): - # Note that this return False for the empty string (idx >= len(self.s)) - ch = self.s[idx : idx + 1] - return ch not in " \t\n" - - def font(self): - match = Deroffer.g_re_font.match(self.s) - if not match: - return False - self.skip_char(match.end()) - return True - - def font2(self): - if self.s[0:2] == "\\f": - c = self.str_at(2) - if c == "(" and self.prch(3) and self.prch(4): - self.skip_char(5) - return True - elif c == "[": - self.skip_char(2) - while self.prch(0) and self.str_at(0) != "]": - self.skip_char() - if self.str_at(0) == "]": - self.skip_char() - elif self.prch(2): - self.skip_char(3) - return True - return False - - def comment(self): - # Here we require that the string start with \" - while self.str_at(0) and self.str_at(0) != "\n": - self.skip_char() - return True - - def numreq(self): - # We require that the string starts with backslash - if self.str_at(1) in "hvwud" and self.str_at(2) == "'": - self.macro += 1 - self.skip_char(3) - while self.str_at(0) != "'" and self.esc_char(): - pass # Weird - if self.str_at(0) == "'": - self.skip_char() - self.macro -= 1 - return True - return False - - def var(self): - reg = "" - s0s1 = self.s[0:2] - if s0s1 == "\\n": - if self.s[3:5] == "dy": - self.skip_char(5) - return True - elif self.str_at(2) == "(" and self.prch(3) and self.prch(4): - self.skip_char(5) - return True - elif self.str_at(2) == "[" and self.prch(3): - self.skip_char(3) - while self.str_at(0) and self.str_at(0) != "]": - self.skip_char() - return True - elif self.prch(2): - self.skip_char(3) - return True - elif s0s1 == "\\*": - if self.str_at(2) == "(" and self.prch(3) and self.prch(4): - reg = self.s[3:5] - self.skip_char(5) - elif self.str_at(2) == "[" and self.prch(3): - self.skip_char(3) - while self.str_at(0) and self.str_at(0) != "]": - reg = reg + self.str_at(0) - self.skip_char() - if self.s[0:1] == "]": - self.skip_char() - else: - return False - elif self.prch(2): - reg = self.str_at(2) - self.skip_char(3) - else: - return False - - if reg in self.reg_table: - old_s = self.s - self.s = self.reg_table[reg] - self.text_arg() - return True - return False - - def size(self): - # We require that the string starts with \s - if self.digit(2) or (self.str_at(2) in "-+" and self.digit(3)): - self.skip_char(3) - while self.digit(0): - self.skip_char() - return True - return False - - def spec(self): - self.specletter = False - if self.s[0:2] == "\\(" and self.prch(2) and self.prch(3): - key = self.s[2:4] - if key in Deroffer.g_specs_specletter: - self.condputs(Deroffer.g_specs_specletter[key]) - self.specletter = True - elif key in Deroffer.g_specs: - self.condputs(Deroffer.g_specs[key]) - self.skip_char(4) - return True - elif self.s.startswith("\\%"): - self.specletter = True - self.skip_char(2) - return True - else: - return False - - def esc(self): - # We require that the string start with backslash - c = self.s[1:2] - if not c: - return False - if c in "eE": - self.condputs("\\") - elif c in "t": - self.condputs("\t") - elif c in "0~": - self.condputs(" ") - elif c in "|^&:": - pass - else: - self.condputs(c) - self.skip_char(2) - return True - - def word(self): - got_something = False - while True: - match = Deroffer.g_re_word.match(self.s) - if not match: - break - got_something = True - self.condputs(match.group(0)) - self.skip_char(match.end(0)) - - # Consume all specials - while self.spec(): - if not self.specletter: - break - - return got_something - - def text(self): - while True: - idx = self.s.find("\\") - if idx == -1: - self.condputs(self.s) - self.s = "" - break - else: - self.condputs(self.s[:idx]) - self.skip_char(idx) - if not self.esc_char_backslash(): - self.condputs(self.str_at(0)) - self.skip_char() - return True - - def letter(self, idx): - ch = self.str_at(idx) - return ch.isalpha() or ch == "_" # underscore is used in C identifiers - - def digit(self, idx): - ch = self.str_at(idx) - return ch.isdigit() - - def number(self): - match = Deroffer.g_re_number.match(self.s) - if not match: - return False - else: - self.condputs(match.group(0)) - self.skip_char(match.end()) - return True - - def esc_char_backslash(self): - # Like esc_char, but we know the string starts with a backslash - c = self.s[1:2] - if c == '"': - return self.comment() - elif c == "f": - return self.font() - elif c == "s": - return self.size() - elif c in "hvwud": - return self.numreq() - elif c in "n*": - return self.var() - elif c == "(": - return self.spec() - else: - return self.esc() - - def esc_char(self): - if self.s[0:1] == "\\": - return self.esc_char_backslash() - return self.word() or self.number() - - def quoted_arg(self): - if self.str_at(0) == '"': - self.skip_char() - while self.s and self.str_at(0) != '"': - if not self.esc_char(): - if self.s: - self.condputs(self.str_at(0)) - self.skip_char() - return True - else: - return False - - def text_arg(self): - # PCA: The deroff.c textArg() disallowed quotes at the start of an argument - # I'm not sure if this was a bug or not - got_something = False - while True: - match = Deroffer.g_re_not_backslash_or_whitespace.match(self.s) - if match: - # Output the characters in the match - self.condputs(match.group(0)) - self.skip_char(match.end(0)) - got_something = True - - # Next is either an escape, or whitespace, or the end - # If it's the whitespace or the end, we're done - if not self.s or self.is_white(0): - return got_something - - # Try an escape - if not self.esc_char(): - # Some busted escape? Just output it - self.condputs(self.str_at(0)) - self.skip_char() - got_something = True - - def text_arg2(self): - if not self.esc_char(): - if self.s and not self.is_white(0): - self.condputs(self.str_at(0)) - self.skip_char() - else: - return False - while True: - if not self.esc_char(): - if self.s and not self.is_white(0): - self.condputs(self.str_at(0)) - self.skip_char() - else: - return True - - # Macro functions - def macro_sh(self): - for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']: - if self.s[2:].startswith(header_str): - self.inheader = True - break - else: - # Did not find a header string - self.inheader = False - self.nobody = True - - def macro_ss_ip(self): - self.nobody = True - return False - - def macro_i_ir(self): - return False - - def macro_Nm(self): - if self.s == "Nm\n": - self.condputs(self.name) - else: - self.name = self.s[3:].strip() + " " - return True - - def macro_close_bracket(self): - self.refer = False - return False - - def macro_ps(self): - if self.is_white(2): - self.pic = True - self.condputs("\n") - return True - - def macro_pe(self): - if self.is_white(2): - self.pic = False - self.condputs("\n") - return True - - def macro_ts(self): - if self.is_white(2): - self.tbl, self.tblstate = True, self.OPTIONS - self.condputs("\n") - return True - - def macro_t_and(self): - if self.is_white(2): - self.tbl, self.tblstate = True, self.FORMAT - self.condputs("\n") - return True - - def macro_te(self): - if self.is_white(2): - self.tbl = False - self.condputs("\n") - return True - - def macro_eq(self): - if self.is_white(2): - self.eqn = True - self.condputs("\n") - return True - - def macro_en(self): - if self.is_white(2): - self.eqn = False - self.condputs("\n") - return True - - def macro_r1(self): - if self.is_white(2): - self.refer2 = True - self.condputs("\n") - return True - - def macro_r2(self): - if self.is_white(2): - self.refer2 = False - self.condputs("\n") - return True - - def macro_de(self): - macro = True - self.condputs("\n") - return True - - def macro_bl_vl(self): - if self.is_white(2): - self.inlist = True - self.condputs("\n") - return True - - def macro_bv(self): - if self.str_at(2) == "L" and self.white(self.str_at(3)): - self.inlist = True - self.condputs("\n") - return True - - def macro_le(self): - if self.is_white(2): - self.inlist = False - self.condputs("\n") - return True - - def macro_lp_pp(self): - self.condputs("\n") - return True - - def macro_ds(self): - self.skip_char(2) - self.skip_leading_whitespace() - if self.str_at(0): - # Split at whitespace - comps = self.s.split(None, 2) - if len(comps) == 2: - name, value = comps - value = value.rstrip() - self.reg_table[name] = value - self.condputs("\n") - return True - - def macro_so_nx(self): - # We always ignore include directives - # deroff.c for some reason allowed this to fall through to the 'tr' case - # I think that was just a bug so I won't replicate it - return True - - def macro_tr(self): - self.skip_char(2) - self.skip_leading_whitespace() - while self.s and self.str_at(0) != "\n": - c = self.str_at(0) - ns = self.str_at(1) - self.skip_char(2) - if not ns or ns == "\n": - ns = " " - self.tr_from += c - self.tr_to += ns - - # Update our table, then swap in the slower tr-savvy condputs - try: # Python2 - self.tr = string.maketrans(self.tr_from, self.tr_to) - except AttributeError: # Python3 - self.tr = "".maketrans(self.tr_from, self.tr_to) - self.condputs = self.condputs_tr - return True - - def macro_sp(self): - self.condputs("\n") - return True - - def macro_other(self): - self.condputs("\n") - return True - - def request_or_macro(self): - # s[0] is period or open single quote - self.skip_char() - s0 = self.s[1:2] - if s0 == "\\": - if self.str_at(1) == '"': - self.condputs("\n") - return True - else: - pass - elif s0 == "[": - self.refer = True - self.condputs("\n") - return True - elif s0 == "]": - self.refer = False - self.skip_char() - return self.text() - elif s0 == ".": - self.macro = False - self.condputs("\n") - return True - - self.nobody = False - s0s1 = self.s[0:2] - - macro_func = Deroffer.g_macro_dict.get(s0s1, Deroffer.macro_other) - if macro_func(self): - return True - - if self.skipheaders and self.nobody: - return True - - self.skip_leading_whitespace() - while self.s and not self.is_white(0): - self.skip_char() - self.skip_leading_whitespace() - while True: - if not self.quoted_arg() and not self.text_arg(): - if self.s: - self.condputs(self.str_at(0)) - self.skip_char() - else: - return True - - def request_or_macro2(self): - self.skip_char() - s0 = self.s[0:1] - if s0 == "\\": - if self.str_at(1) == '"': - self.condputs("\n") - return True - else: - pass - elif s0 == "[": - self.refer = True - self.condputs("\n") - return True - elif s0 == "]": - self.refer = False - self.skip_char() - return self.text() - elif s0 == ".": - self.macro = False - self.condputs("\n") - return True - - self.nobody = False - s0s1 = self.s[0:2] - if s0s1 == "SH": - for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']: - if self.s[2:].startswith(header_str): - self.inheader = True - break - else: - # Did not find a header string - self.inheader = False - self.nobody = True - elif s0s1 in ["SS", "IP", "H "]: - self.nobody = True - elif s0s1 in ["I ", "IR", "IB", "B ", "BR", "BI", "R ", "RB", "RI", "AB"]: - pass - elif s0s1 in ["] "]: - self.refer = False - elif s0s1 in ["PS"]: - if self.is_white(2): - self.pic = True - self.condputs("\n") - return True - elif s0s1 in ["PE"]: - if self.is_white(2): - self.pic = False - self.condputs("\n") - return True - elif s0s1 in ["TS"]: - if self.is_white(2): - self.tbl, self.tblstate = True, self.OPTIONS - self.condputs("\n") - return True - elif s0s1 in ["T&"]: - if self.is_white(2): - self.tbl, self.tblstate = True, self.FORMAT - self.condputs("\n") - return True - elif s0s1 in ["TE"]: - if self.is_white(2): - self.tbl = False - self.condputs("\n") - return True - elif s0s1 in ["EQ"]: - if self.is_white(2): - self.eqn = True - self.condputs("\n") - return True - elif s0s1 in ["EN"]: - if self.is_white(2): - self.eqn = False - self.condputs("\n") - return True - elif s0s1 in ["R1"]: - if self.is_white(2): - self.refer2 = True - self.condputs("\n") - return True - elif s0s1 in ["R2"]: - if self.is_white(2): - self.refer2 = False - self.condputs("\n") - return True - elif s0s1 in ["de"]: - macro = True - self.condputs("\n") - return True - elif s0s1 in ["BL", "VL", "AL", "LB", "RL", "ML", "DL"]: - if self.is_white(2): - self.inlist = True - self.condputs("\n") - return True - elif s0s1 in ["BV"]: - if self.str_at(2) == "L" and self.white(self.str_at(3)): - self.inlist = True - self.condputs("\n") - return True - elif s0s1 in ["LE"]: - if self.is_white(2): - self.inlist = False - self.condputs("\n") - return True - elif s0s1 in ["LP", "PP", "P\n"]: - self.condputs("\n") - return True - elif s0s1 in ["ds"]: - self.skip_char(2) - self.skip_leading_whitespace() - if self.str_at(0): - # Split at whitespace - comps = self.s.split(None, 2) - if len(comps) == 2: - name, value = comps - value = value.rstrip() - self.reg_table[name] = value - self.condputs("\n") - return True - elif s0s1 in ["so", "nx"]: - # We always ignore include directives - # deroff.c for some reason allowed this to fall through to the 'tr' case - # I think that was just a bug so I won't replicate it - return True - elif s0s1 in ["tr"]: - self.skip_char(2) - self.skip_leading_whitespace() - while self.s and self.str_at(0) != "\n": - c = self.str_at(0) - ns = self.str_at(1) - self.skip_char(2) - if not ns or ns == "\n": - ns = " " - self.tr_from += c - self.tr_to += ns - - # Update our table, then swap in the slower tr-savvy condputs - try: # Python2 - self.tr = string.maketrans(self.tr_from, self.tr_to) - except AttributeError: # Python3 - self.tr = "".maketrans(self.tr_from, self.tr_to) - self.condputs = self.condputs_tr - - return True - elif s0s1 in ["sp"]: - self.condputs("\n") - return True - else: - self.condputs("\n") - return True - - if self.skipheaders and self.nobody: - return True - - self.skip_leading_whitespace() - while self.s and not self.is_white(0): - self.skip_char() - self.skip_leading_whitespace() - while True: - if not self.quoted_arg() and not self.text_arg(): - if self.s: - self.condputs(self.str_at(0)) - self.skip_char() - else: - return True - - def do_tbl(self): - if self.tblstate == self.OPTIONS: - while self.s and self.str_at(0) != ";" and self.str_at(0) != "\n": - self.skip_leading_whitespace() - if not self.str_at(0).isalpha(): - # deroff.c has a bug where it can loop forever here...we try to work around it - self.skip_char() - else: # Parse option - option = self.s - arg = "" - - idx = 0 - while option[idx : idx + 1].isalpha(): - idx += 1 - - if option[idx : idx + 1] == "(": - option = option[:idx] - self.s = self.s[idx + 1 :] - arg = self.s - else: - self.s = "" - - if arg: - idx = arg.find(")") - if idx != -1: - arg = arg[:idx] - self.s = self.s[idx + 1 :] - else: - # self.skip_char() - pass - - if option.lower() == "tab": - self.tblTab = arg[0:1] - - self.tblstate = self.FORMAT - self.condputs("\n") - - elif self.tblstate == self.FORMAT: - while self.s and self.str_at(0) != "." and self.str_at(0) != "\n": - self.skip_leading_whitespace() - if self.str_at(0): - self.skip_char() - - if self.str_at(0) == ".": - self.tblstate = self.DATA - self.condputs("\n") - elif self.tblstate == self.DATA: - if self.tblTab: - self.s = self.s.replace(self.tblTab, "\t") - self.text() - return True - - def do_line(self): - if self.s[0:1] in ".'": - if not self.request_or_macro(): - return False - elif self.tbl: - self.do_tbl() - else: - self.text() - return True - - def deroff(self, str): - lines = str.split("\n") - for line in lines: - self.s = line + "\n" - if not self.do_line(): - break - # self.putchar('\n') - - -def deroff_files(files): - for arg in files: - sys.stderr.write(arg + "\n") - if arg.endswith(".gz"): - f = gzip.open(arg, "r") - str = f.read() - if IS_PY3: - str = str.decode("latin-1") - else: - f = open(arg, "r") - str = f.read() - d = Deroffer() - d.deroff(str) - d.flush_output(sys.stdout) - f.close() - - -if __name__ == "__main__": - import gzip - - paths = sys.argv[1:] - deroff_files(paths) - # import cProfile, profile, pstats - # profile.run("deroff_files(paths)", "fooprof") - # p = pstats.Stats("fooprof") - # p.sort_stats("time").print_stats(100) - # p.sort_stats('calls').print_callers(.5, 'startswith')