辰东,斗破苍穹续集,懒人听书

新聞中心

這里有您想知道的互聯(lián)網(wǎng)營銷解決方案

Python分割器教你給文章做手術(shù)

Python分割器在我們進行文章分割的時候會經(jīng)常用到。當然一篇相當長的文章會讓你有些頭疼?？赐晗旅娴拇a希望大家能夠熟練的使用Python分割器進行文章分割。

 
 
 
  
  
  # 將txt小說分割轉(zhuǎn)換成多個HTML文件   
  
  
  # @author : GreatGhoul   
  
  
  # @email : [email protected]   
  
  
  # @blog : http://greatghoul.javaeye.com   
  
  
  import re   
  
  
  import os   
  
  
  # regex for the section title   
  
  
  # sec_re = re.compile(r'第.+卷\s+.+\s+第.+章\s+.+')   
  
  
  # txt book's path.   
  
  
  source_path = 'f:\\傭兵天下.txt'   
  
  
  path_pieces = os.path.split(source_path)   
  
  
  novel_title = re.sub(r'(\..*$)|($)', '', path_pieces[1])   
  
  
  target_path = '%s%s_html' % (path_pieces[0], novel_title)   
  
  
  section_re = re.compile(r'^\s*第.+卷\s+.*$')   
  
  
  section_head = '''''   
  
  
     
  
  
     
  
  
     
  
  
  %s   
  
  
     
  
  
  margin:0;   
  
  
  padding: 20px; background:#FAFAD2;color:#2B4B86;text
-align:center;">   
  
  
  %s
去頁尾
'''   
  
  
  # escape xml/html   
  
  
  def escape_xml(code):   
  
  
  text = code   
  
  
  text = re.sub(r'<', '<', text)   
  
  
  text = re.sub(r'>', '>', text)   
  
  
  text = re.sub(r'&', '&', text)   
  
  
  text = re.sub(r'\t', '    ', text)   
  
  
  text = re.sub(r'\s', ' ', text)   
  
  
  return text   
  
  
  # entry of the script   
  
  
  def main():   
  
  
  # create the output folder   
  
  
  if not os.path.exists(target_path):   
  
  
  os.mkdir(target_path)   
  
  
  # open the source file   
  
  
  input = open(source_path, 'r')   
  
  
  sec_count = 0   
  
  
  sec_cache = []   
  
  
  idx_cache = []   
  
  
  output = open('%s\\%d.html' % (target_path, sec_count), 'w')   
  
  
  preface_title = '%s 前言' % novel_title   
  
  
  output.writelines([section_head % (preface_title, 
preface_title)])   
  
  
  idx_cache.append('
%s'   
  
  
  % (sec_count, novel_title))   
  
  
  for line in input:   
  
  
  # is a chapter's title?   
  
  
  if line.strip() == '':   
  
  
  pass   
  
  
  elif re.match(section_re, line):   
  
  
  line = re.sub(r'\s+', ' ', line)   
  
  
  print 'converting %s...' % line   
  
  
  # write the section footer   
  
  
  sec_cache.append('
')   
  
  
  if sec_count == 0:   
  
  
  sec_cache.append('目錄 | ')   
  
  
  sec_cache.append('下一篇 | '   
  
  
  % (sec_count + 1))   
  
  
  else:   
  
  
  sec_cache.append('上一篇 | '   
  
  
  % (sec_count - 1))   
  
  
  sec_cache.append('目錄 | ')   
  
  
  sec_cache.append('下一篇 | '   
  
  
  % (sec_count + 1))   
  
  
  sec_cache.append('回頁首
')   
  
  
  sec_cache.append('')   
  
  
  output.writelines(sec_cache)   
  
  
  output.flush()   
  
  
  output.close()   
  
  
  sec_cache = []   
  
  
  sec_count += 1   
  
  
  # create a new section   
  
  
  output = open('%s\\%d.html' % (target_path, sec_count), 'w')   
  
  
  output.writelines([section_head % (line, line)])   
  
  
  idx_cache.append('
%s'   
  
  
  % (sec_count, line))   
  
  
  else:   
  
  
  sec_cache.append('%s
'   
  
  
  % escape_xml(line))   
  
  
  # write rest lines   
  
  
  sec_cache.append('下一篇 | '   
  
  
  % (sec_count - 1))   
  
  
  sec_cache.append('目錄 | ')   
  
  
  sec_cache.append('#">回頁首
')   
  
  
  output.writelines(sec_cache)   
  
  
  output.flush()   
  
  
  output.close()   
  
  
  sec_cache = []   
  
  
  # write the menu   
  
  
  output = open('%s\\index.html' % (target_path), 'w')   
  
  
  menu_head = '%s 目錄' % novel_title   
  
  
  output.writelines([section_head % (menu_head, menu_head), 
''])   
  
  
  output.writelines(idx_cache)   
  
  
  output.writelines([''])   
  
  
  output.flush()   
  
  
  output.close()   
  
  
  inx_cache = []   
  
  
  print 'completed. %d chapter(s) in total.' % sec_count   
  
  
  if __name__ == '__main__':   
  
  
  main()

以上就是對Python分割器的相關(guān)介紹，希望大家有所收獲。

【編輯推薦】

Python數(shù)據(jù)編組對文字串的讀寫
Python 拼寫檢查如何更簡單的使用
Python函數(shù)變量在應用中的“竅門”
在Python函數(shù)變量中如何使用global語句簡介
Python編程語言維和受到眾人的追捧

本文標題：Python分割器教你給文章做手術(shù)
鏈接分享：http://fisionsoft.com.cn/article/cooohgh.html

新聞中心

%s

其他資訊