package com.fc2.blog55.zennin.aaacafe;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.text.SimpleDateFormat;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.ParagraphTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.NodeTreeWalker;
public class ConvertAsablog {
public static void main(String[] args) {
ConvertAsablog conv = new ConvertAsablog();
conv.execute(args);
}
void execute(String[] args) {
try {
// 出力ファイル
FileOutputStream os = new FileOutputStream(args[1]);
OutputStreamWriter writer = new OutputStreamWriter(os, "EUC-JP");
BufferedWriter bw = new BufferedWriter(writer);
// 入力ファイル
Parser parser = new Parser(args[0]);
NodeList list = parser.parse(null);
// HTML部分を取得
NodeList html = list.extractAllNodesThatMatch(new TagNameFilter(
"html"));
NodeTreeWalker ite = new NodeTreeWalker(html.elementAt(0));
// 日時変換の準備
SimpleDateFormat fmt1 = new SimpleDateFormat(
"yyyy年MM月dd日 HH時mm分ss秒");
SimpleDateFormat fmt2 = new SimpleDateFormat("MM/dd/yyyy HH:mm:ss");
// メイン処理
while (ite.hasMoreNodes()) {
Node node = ite.nextNode();
// <div class="msg">以外はスキップ
if (!(node instanceof Div))
continue;
Div div = (Div) node;
String clsAtr = div.getAttribute("class");
if (clsAtr == null || !clsAtr.equals("msg"))
continue;
// スキップ
ite.nextNode();
ite.nextNode();
// タイトル
node = ite.nextNode();
title = node.toPlainTextString();
// スキップ
ite.nextNode();
ite.nextNode();
// 日時
node = ite.nextNode();
dateStr = fmt2.format(fmt1.parse(node.toPlainTextString()));
// スキップ
ite.nextNode();
ite.nextNode();
// 本文
node = ite.nextNode();
body = node.toHtml();
body = body.replaceAll("<div class=\"msg-body\">\n\n", "");
body = body.replaceAll("</div>", "");
body = body.replaceAll("\">", "\" target=\"_blank\">");
while (true) {
node = ite.nextNode();
if (!(node instanceof ParagraphTag))
continue;
ParagraphTag p = (ParagraphTag) node;
String clsAtr2 = p.getAttribute("class");
if (clsAtr2 == null || !clsAtr2.equals("msg-footer"))
continue;
// スキップ
ite.nextNode();
// カテゴリー
node = ite.nextNode();
category = node.toPlainTextString();
if (category.equals("コメント(0)"))
category = "Others";
break;
}
write(bw);
}
bw.close();
writer.close();
os.close();
} catch (Exception e) {
e.printStackTrace();
}
}
String title;
String category;
String dateStr;
String body;
void write(BufferedWriter out) throws IOException {
StringBuffer str = new StringBuffer();
str.append("AUTHOR: Zennin\r");
str.append("TITLE: " + title + "\r");
str.append("STATUS: Publish\r");
str.append("ALLOW COMMENTS: 1\r");
str.append("CONVERT BREAKS: default\r");
str.append("ALLOW PINGS: 1\r");
str.append("PRIMARY CATEGORY: " + category + "\r");
str.append("CATEGORY: " + category + "\r");
str.append("\r");
str.append("DATE: " + dateStr + "\r");
str.append("-----\r");
str.append("BODY:\r");
str.append(body + "\r");
str.append("-----\r");
str.append("EXTENDED BODY:\r");
str.append("\r");
str.append("-----\r");
str.append("EXCERPT:\r");
str.append("\r");
str.append("-----\r");
str.append("KEYWORDS:\r");
str.append("\r");
str.append("-----\r");
str.append("\r");
str.append("--------\r");
out.write(str.toString());
}
}
めちゃくちゃスパゲッティですが、1回しか使わないのでこんなもんでしょうか。
<div class="msg">から
</div> <!-- //msg -->で1メッセージ。
package com.fc2.blog55.zennin.aaacafe;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.ListIterator;
import com.fc2.blog55.zennin.aaacafe.dao.GbCategIndexDAO;
import com.fc2.blog55.zennin.aaacafe.dao.GbMainDAO;
import com.fc2.blog55.zennin.aaacafe.dao._RootDAO;
public class Convert {
public static void main(String[] args) {
Convert conv = new Convert();
conv.execute();
}
void execute() {
try {
_RootDAO.initialize();
getCategs();
File file = new File( "aaacafe_blog.txt" );
FileOutputStream os = new FileOutputStream( file );
OutputStreamWriter writer = new OutputStreamWriter( os, "EUC-JP" );
BufferedWriter bw = new BufferedWriter( writer );
GbMainDAO mainDao = new GbMainDAO();
List mainList = mainDao.findAll();
SimpleDateFormat fmt = new SimpleDateFormat("MM/dd/yyyy HH:mm:ss");
for (ListIterator lite = mainList.listIterator(); lite.hasNext();) {
GbMain mainData = (GbMain) lite.next();
StringBuffer str = new StringBuffer();
String title = mainData.getTitle();
String category = getCateg ( mainData.getCateg() );
String body = mainData.getBody();
Date date = new Date( mainData.getId() * 1000L );
String dateStr = fmt.format(date);
body = body.replaceAll("\">", "\" target=\"_blank\">");
body = body.replaceAll("<file1>", convFile(mainData.getId(), 1));
body = body.replaceAll("<file2>", convFile(mainData.getId(), 2));
body = body.replaceAll("<file3>", convFile(mainData.getId(), 3));
body = body.replaceAll("<file4>", convFile(mainData.getId(), 4));
body = body.replaceAll("<file5>", convFile(mainData.getId(), 5));
body = body.replaceAll("<file6>", convFile(mainData.getId(), 6));
body = body.replaceAll("<file7>", convFile(mainData.getId(), 7));
body = body.replaceAll("<file8>", convFile(mainData.getId(), 8));
body = body.replaceAll("<file9>", convFile(mainData.getId(), 9));
str.append( "AUTHOR: Zennin\r" );
str.append( "TITLE: " + title + "\r" );
str.append( "STATUS: Publish\r" );
str.append( "ALLOW COMMENTS: 1\r" );
str.append( "CONVERT BREAKS: default\r" );
str.append( "ALLOW PINGS: 1\r" );
str.append( "PRIMARY CATEGORY: " + category + "\r" );
str.append( "CATEGORY: " + category + "\r" );
str.append( "\r" );
str.append( "DATE: " + dateStr + "\r" );
str.append( "-----\r" );
str.append( "BODY:\r" );
if ( mainData.getUrlName().equals("") == false) {
str.append( "<a href=\"" + mainData.getUrl() +
"\" target=\"_blank\">" + mainData.getUrlName() + "</a>\r\r");
}
str.append( body + "\r" );
str.append( "-----\r" );
str.append( "EXTENDED BODY:\r" );
str.append( "\r" );
str.append( "-----\r" );
str.append( "EXCERPT:\r" );
str.append( "\r" );
str.append( "-----\r" );
str.append( "KEYWORDS:\r" );
str.append( "\r" );
str.append( "-----\r" );
str.append( "\r" );
str.append( "--------\r" );
bw.write(str.toString());
}
bw.close();
writer.close();
os.close();
} catch ( Exception e) {
e.printStackTrace();
}
}
List categIndexList = null;
void getCategs() {
GbCategIndexDAO categIndexDao = new GbCategIndexDAO();
categIndexList = categIndexDao.findAll();
}
String getCateg(String id) {
for (ListIterator lite = categIndexList.listIterator(); lite.hasNext();) {
GbCategIndex categ = (GbCategIndex) lite.next();
if (id.equals(categ.getId()))
return categ.getCname();
}
return "Others";
}
String convFile(long id, int i) {
return "<a href=\"http://blog-imgs-18.fc2.com/z/e/n/zennin/" +
id + "_" + i + ".jpg\" " +
"target=\"_blank\"><img src=\"http://blog-imgs-18.fc2.com/z/e/n/zennin/" +
id + "_" + i + ".jpg\" " +
"alt=\"\" border=\"0\"></a>";
}
}
AUTHOR: Zennin TITLE: インポートテスト STATUS: Publish ALLOW COMMENTS: 1 CONVERT BREAKS: default ALLOW PINGS: 1 PRIMARY CATEGORY: Blog移行計画 CATEGORY: Blog移行計画 DATE: 08/05/2006 12:00:00 ----- BODY: インポートテスト ----- EXTENDED BODY: ----- EXCERPT: ----- KEYWORDS: ----- --------これを文字コードEUCで作成してインポートすればいいらしい。
Author:Zennin
Twitter: zenninblog
| 日 | 月 | 火 | 水 | 木 | 金 | 土 |
|---|---|---|---|---|---|---|
| - | - | 1 | 2 | 3 | 4 | 5 |
| 6 | 7 | 8 | 9 | 10 | 11 | 12 |
| 13 | 14 | 15 | 16 | 17 | 18 | 19 |
| 20 | 21 | 22 | 23 | 24 | 25 | 26 |
| 27 | 28 | 29 | 30 | 31 | - | - |