所以我的程序的基本目的是从C文件中取出注释并显示它们。
import urllib2
import html2text
import re
import subprocess
from cStringIO import StringIO
url = raw_input('Please input URL youd like to analyze: ')
page = urllib2.urlopen(url)
html_content = page.read().decode('utf8')
rendered_content =
html2text.html2text(html_content).encode('ascii','ignore')
f = open('file_text.txt', 'wb')
f.write(rendered_content)
f.close()
fd = open("file_text.txt", "r")
buf = fd.read()
def comment_remover(text):
def replacer(match):
s = match.group(0)
if s.startswith('/'):
return " "
else:
return s
pattern = re.compile(
#Stackoverflow- properly removes all comments
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
re.DOTALL | re.MULTILINE
)
return re.sub(pattern, replacer, text)
input = StringIO(comment_remover(buf)) # source_code is a string with the source code.
output = StringIO()
process = subprocess.Popen(['sed', '/path/to/remccoms3.sed'], input=input, output=output)
return_code = process.wait()
stripped_code = output.getvalue()
这是我收到错误消息的地方:
from linecache import getline
with open("file_text.txt") as f:
for ind, line in enumerate(f,1):
if line.rstrip() == "---|---":
print(getline(f.name, ind + 4))
基本上line.rstrip == '---|---'
是在我运行程序时,该行是显示实际页面本身的注释的位置,而不是来自源的注释。