我已经阅读了本网站或 Stackoverflow 上关于此主题的几乎所有答案,但未能解决以下问题。
当我从 PDF 文件复制文本并将其粘贴到文本文件file.txt
中时,文本看起来很正常,但是当我使用 cat 命令时:
cat -v file.txt
输出是:
vbox = None
M-BM- M-BM- M-BM- M-BM- def __init__(self, title="Error!", parent=None,
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- flags=Gtk.DialogFlags.MODAL, buttons=("NO",
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- Gtk.ResponseType.NO, "_YES",
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- Gtk.ResponseType.YES)):
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- super().__init__(title=title, parent=parent, flags=flags,
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- buttons=buttons)
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- self.vbox = self.get_content_area()
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- self.hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL,
spacing=5)
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- icon_theme = Gtk.IconTheme.get_default()
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- icon = icon_theme.load_icon("dialog-question", 48,
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- Gtk.IconLookupFlags.FORCE_SVG)
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- image = Gtk.Image.new_from_pixbuf(icon)
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- self.hbox.pack_start(image, False, False, 5)
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- self.vbox.add(self.hbox)
M-BM- M-BM- M-BM- M-BM- def set_message(self, message, add_msg=None):
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- self.hbox.pack_start(Gtk.Label(message), False, False, 5)
M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- M-BM- if add_msg != None:
或者当我使用bat 命令时:
bat -A file.txt
输出是:
vbox•=•None␊
\u{a0}\u{a0}\u{a0}\u{a0}def•__init__(self,•title="Error!",•parent=None,␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}flags=Gtk.DialogFlags.MODAL,•buttons=("NO",␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}Gtk.ResponseType.NO,•"_YES",␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}Gtk.ResponseType.YES)):␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}super().__init__(title=title,•parent=parent,•flags=flags,␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}buttons=buttons)␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}self.vbox•=•self.get_content_area()␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}self.hbox•=•Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL,␊
spacing=5)␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}icon_theme•=•Gtk.IconTheme.get_default()␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}icon•=•icon_theme.load_icon("dialog-question",•48,␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}Gtk.IconLookupFlags.FORCE_SVG)␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}image•=•Gtk.Image.new_from_pixbuf(icon)␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}self.hbox.pack_start(image,•False,•False,•5)␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}self.vbox.add(self.hbox)␊
\u{a0}\u{a0}\u{a0}\u{a0}def•set_message(self,•message,•add_msg=None):␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}self.hbox.pack_start(Gtk.Label(message),•False,•False,•5)␊
\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}\u{a0}if•add_msg•!=•None:␊
在 Visual Studio 代码上,当我将鼠标悬停在这些字符上时,我得到:
The character U+00a0 is not a basic ASCII character.
如何使用 sed 命令将这些字符替换为普通的“空格”字符?
看起来像不间断空格 (U+00A0)的 UTF-8 编码,字节为
c2 a0
十六进制。像sed -e 's/\xc2\xa0/ /g'
GNU sed 这样的东西应该可以用常规空格替换它们。使用Raku(以前称为 Perl_6)
以下是用 Raku 编码的解决方案,Raku 是 Perl 编程语言家族的成员。Raku 的一个优势是对 Unicode 开箱即用的高级支持,无需外部库/包/模块。
上面我只使用了“官方”Unicode 名称
 
,可以将其输入到熟悉的s///
替换运算符中\c[NO-BREAK SPACE]
(参见https://www.unicode.org/charts/PDF/U0080.pdf)。但
\c[NBSP]
下面的名称别名也有效:最后,输入十六进制也可以:
有关 Raku 中 Unicode 支持的更多信息,请参见下文。
https://www.codesections.com/blog/raku-unicode/
https://docs.raku.org/language/unicode