<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:georss='http://www.georss.org/georss' xmlns:gd='http://schemas.google.com/g/2005' xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-31421954</id><updated>2012-01-20T22:41:07.442-08:00</updated><category term='capacity'/><category term='cache'/><category term='order by'/><category term='scaling'/><category term='insert'/><category term='presentation'/><category term='opensocial'/><category term='duplicate'/><category term='configuration'/><category term='ganglia'/><category term='lots of writes'/><category term='monitor'/><category term='slave'/><category term='maintenance'/><category term='filesize'/><category term='myspace'/><category term='review'/><category term='scripts'/><category term='mysqluc08'/><category term='Cassandra'/><category term='apache'/><category term='linux'/><category term='facebook'/><category term='left-most-prefix'/><category term='system'/><category term='master dual replication'/><category term='mysql'/><category term='group by'/><category term='php'/><category term='maatkit like'/><category term='federation'/><category term='2007'/><category term='memory'/><category term='INNODB'/><category term='book'/><category term='mongodb'/><category term='NoSQL'/><category term='mysqluc'/><category term='ndb'/><category term='MASTER_POS_WAIT'/><category term='internals'/><category term='sql'/><category term='shard'/><category term='server-id'/><category term='optimization'/><category term='packt'/><category term='index'/><category term='filesort'/><category term='throughput'/><category term='connect code'/><category term='fancy'/><category term='apc'/><category term='INNODB plugin'/><category term='replication'/><title type='text'>mySQL  DBA</title><subtitle type='html'>&lt;a href="http://www.linkedin.com/in/dathan"&gt;&lt;img src="http://www.linkedin.com/img/webpromo/btn_viewmy_160x25.gif" width="160" height="25" border="0" alt="View Dathan&amp;#39;s profile on LinkedIn"&gt;&lt;/a&gt;</subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default?max-results=100'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><link rel='next' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default?start-index=101&amp;max-results=100'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>147</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>100</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-31421954.post-4079809703544167243</id><published>2012-01-17T10:49:00.000-08:00</published><updated>2012-01-17T10:49:20.844-08:00</updated><title type='text'>mySQL Column Types and Why it Matters.</title><content type='html'>&lt;style&gt;&lt;!-- /* Font Definitions */@font-face {font-family:"Courier New"; panose-1:2 7 3 9 2 2 5 2 4 4; mso-font-charset:0; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 0 0 0 1 0;}@font-face {font-family:Times; panose-1:2 0 5 0 0 0 0 0 0 0; mso-font-charset:0; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 0 0 0 1 0;}@font-face {font-family:Cambria; panose-1:2 4 5 3 5 4 6 3 2 4; mso-font-charset:0; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 0 0 0 1 0;} /* Style Definitions */p.MsoNormal, li.MsoNormal, div.MsoNormal {mso-style-parent:""; margin-top:0in; margin-right:0in; margin-bottom:10.0pt; margin-left:0in; mso-pagination:widow-orphan; font-size:12.0pt; font-family:"Times New Roman"; mso-ascii-font-family:Cambria; mso-ascii-theme-font:minor-latin; mso-fareast-font-family:Cambria; mso-fareast-theme-font:minor-latin; mso-hansi-font-family:Cambria; mso-hansi-theme-font:minor-latin; mso-bidi-font-family:"Times New Roman"; mso-bidi-theme-font:minor-bidi;}@page Section1 {size:8.5in 11.0in; margin:1.0in 1.25in 1.0in 1.25in; mso-header-margin:.5in; mso-footer-margin:.5in; mso-paper-source:0;}div.Section1 {page:Section1;}--&gt;&lt;/style&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;MySQL is awesome at converting stringsto integers when comparing column lvalues with converted rvalues. So much sothat many of us take this fact for granted. When does this assumption breakdown? When does passing in the wrong value cause problems in mySQL?&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;Let's take a table EmailLookup forexample. &lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;CREATE TABLE `EmailLookup` (&lt;br /&gt;&amp;nbsp; `userId` bigint(20) unsigned NOT NULL,&lt;br /&gt;&amp;nbsp; `email` varchar(128) NOT NULL,&lt;br /&gt;&amp;nbsp; `emailCrc32` int(11) unsigned NOT NULL,&lt;br /&gt;&amp;nbsp; `createDate` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,&lt;br /&gt;&amp;nbsp; PRIMARY KEY (`emailCrc32`,`userId`),&lt;br /&gt;&amp;nbsp; KEY `createDate` (`createDate`)&lt;br /&gt;) ENGINE=InnoDB DEFAULT CHARSET=latin1&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;The primary key is emailCrc32, userIdfor a key size of 12 bytes (4 bytes for int, 8 bytes for bigint). Since this is a compound key (key with twocolumns), following the rules of Left Most Prefix I get two index lookup typesfor one. emailCrc32 and userId is a unique index lookup while emailCrc32 is alsoan index lookup. Thus I can do&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;SELECT email FROM EmailLookup WHEREemailCrc32 = ? and userId =?&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;OR&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;SELECT email FROM EmailLookup WHEREemailCrc32 = ?&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;What happens if I pass emailCrc32 anumeric string. i.e.&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin-bottom: 0.0001pt;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;mysql&amp;gt; SELECT email FROMEmailLookup WHERE emailCrc32 = '1';&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;Empty set (0.00 sec)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;So cool, works and comes back superquick.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;What happens if I pass emailCrc32 areal string. i.e.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt; mso-para-margin-bottom: .01gd; mso-para-margin-left: 0in; mso-para-margin-right: 0in; mso-para-margin-top: .01gd;"&gt;&lt;span style="font-family: Times; font-size: 10.0pt; mso-bidi-font-family: &amp;quot;Times New Roman&amp;quot;;"&gt;&lt;span style="font-size: small;"&gt;mysql&amp;gt; SELECT email FROM EmailLookupWHERE emailCrc32 = 'a';&lt;br /&gt;Empty set, 1 warning (0.00 sec)&lt;br /&gt;&lt;br /&gt;mysql&amp;gt; show warnings;&lt;/span&gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-family: &amp;quot;Courier New&amp;quot;; font-size: 10.0pt; mso-bidi-font-family: &amp;quot;Times New Roman&amp;quot;;"&gt;+---------+------+---------------------------------------------------------------+&lt;br /&gt;| Level&amp;nbsp;&amp;nbsp; | CodeMessage&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;|&lt;br /&gt;+---------+------+---------------------------------------------------------------+&lt;br /&gt;| Warning | 1366 | Incorrect integer value: 'a' for column 'emailCrc32' at row1 |&lt;br /&gt;+---------+------+---------------------------------------------------------------+&lt;br /&gt;1 row in set (0.00 sec)&lt;/span&gt;&lt;span style="font-family: Times; font-size: 10.0pt; mso-bidi-font-family: &amp;quot;Times New Roman&amp;quot;;"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;Comes back quick.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;&lt;br /&gt;What happens if I pass the column a big int&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt; mso-para-margin-bottom: .01gd; mso-para-margin-left: 0in; mso-para-margin-right: 0in; mso-para-margin-top: .01gd;"&gt;&lt;span style="font-family: Times; font-size: 10.0pt; mso-bidi-font-family: &amp;quot;Times New Roman&amp;quot;;"&gt;&lt;span style="font-size: small;"&gt;&lt;br /&gt;mysql&amp;gt; SELECT email FROM EmailLookup WHERE emailCrc32 = 100003256490710;&lt;br /&gt;Empty set, 1 warning (0.00 sec)&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-family: &amp;quot;Courier New&amp;quot;; font-size: 10.0pt; mso-bidi-font-family: &amp;quot;Times New Roman&amp;quot;;"&gt;mysql&amp;gt; show warnings;&lt;br /&gt;+---------+------+-----------------------------------------------------+&lt;br /&gt;| Level&amp;nbsp;&amp;nbsp; | Code |Message&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;|&lt;br /&gt;+---------+------+-----------------------------------------------------+&lt;br /&gt;| Warning | 1264 | Out of range value for column 'emailCrc32' at row 1 |&lt;br /&gt;+---------+------+-----------------------------------------------------+&lt;br /&gt;1 row in set (0.00 sec)&lt;/span&gt;&lt;span style="font-family: Times; font-size: 10.0pt; mso-bidi-font-family: &amp;quot;Times New Roman&amp;quot;;"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt; mso-para-margin-bottom: .01gd; mso-para-margin-left: 0in; mso-para-margin-right: 0in; mso-para-margin-top: .01gd;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;Comes back quick. But, what if I do aDELETE on a 64-bit server?&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace; margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;mysql&amp;gt; DELETE FROM EmailLookup WHEREemailCrc32 =&amp;nbsp; '100003256490710';&lt;/span&gt;&lt;/div&gt;&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace; font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace; margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&amp;nbsp;Query OK, 0 rows affected (2 min14.16 sec)&lt;/span&gt;&lt;/div&gt;&lt;span style="font-family: &amp;quot;Courier New&amp;quot;,Courier,monospace; font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;WHAT? A 2 min query for&amp;nbsp; animpossible DELETE? Notice that '100003256490710' is a string.&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;What is happening to INNODB&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;mySQL thread id 90620794, query id3745441316 10.170.22.169 schoolfeed updating&lt;br /&gt;DELETE FROM EmailLookup WHERE emailCrc32 = '100003256490710'&lt;br /&gt;TABLE LOCK table `Shard1`.`EmailLookup` trx id 2AB524B6D lock mode IX&lt;br /&gt;RECORD LOCKS space id 160 page no 5626 n bits 248 index `PRIMARY` of table `Shard1`.`EmailLookup`trx id 2AB524B6D lock_mode X locks rec but not gap&lt;br /&gt;RECORD LOCKS space id 160 page no 7580 n bits 240 index `PRIMARY` of table`Shard1`.`EmailLookup` trx id 2AB524B6D lock_mode X locks rec but not gap&lt;br /&gt;RECORD LOCKS space id 160 page no 6039 n bits 280 index `PRIMARY` of table`Shard1`.`EmailLookup` trx id 2AB524B6D lock_mode X locks rec but not gap&lt;br /&gt;RECORD LOCKS space id 160 page no 455 n bits 352 index `PRIMARY` of table`Shard1`.`EmailLookup` trx id 2AB524B6D lock_mode X locks rec but not gap&lt;br /&gt;RECORD LOCKS space id 160 page no 3174 n bits 288 index `PRIMARY` of table`Shard1`.`EmailLookup` trx id 2AB524B6D lock_mode X locks rec but not gap&lt;br /&gt;RECORD LOCKS space id 160 page no 5997 n bits 304 index `PRIMARY` of table`Shard1`.`EmailLookup` trx id 2AB524B6D lock_mode X locks rec but not gap&lt;br /&gt;RECORD LOCKS space id 160 page no 1486 n bits 296 index `PRIMARY` of table`Shard1`.`EmailLookup` trx id 2AB524B6D lock_mode X locks rec but not gap&lt;br /&gt;RECORD LOCKS space id 160 page no 5607 n bits 280 index `PRIMARY` of table`Shard1`.`EmailLookup` trx id 2AB524B6D lock_mode X locks rec but not gap&lt;br /&gt;RECORD LOCKS space id 160 page no 2729 n bits 312 index `PRIMARY` of table`Shard1`.`EmailLookup` trx id 2AB524B6D lock_mode X locks rec but not gap&lt;br /&gt;TOO MANY LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;Yikes this is bad.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;/span&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;Is this a bug? Maybe but is also acondition that should not happen if types are respected. The moral of the story is if your application respects column types mySQL will respect you :)&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: Times;"&gt;This is from Server version: 5.1.57-rel12.8-log Percona Server (GPL), 12.8, Revision 233&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt; &lt;span style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-4079809703544167243?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/4079809703544167243/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=4079809703544167243' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4079809703544167243'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4079809703544167243'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2012/01/mysql-column-types-and-why-it-matters.html' title='mySQL Column Types and Why it Matters.'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-4564387788193304981</id><published>2011-12-20T11:29:00.000-08:00</published><updated>2011-12-20T11:47:16.919-08:00</updated><title type='text'>The Effect of using Cloudfront and why it matters</title><content type='html'>For years (12+) I have been building systems on every tier of the web. Everything from low-level OS optimizations, mySQL internals,&amp;nbsp;interpreted&amp;nbsp;language performance tricks to static content optimization.&lt;br /&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Building CDN's are easy, but what makes Akami or Cloudfront attractive-presences known as edge nodes-they have around the world to syndicate your content closest to the&amp;nbsp;requester.&amp;nbsp;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Their smart DNS servers send people to the closest edge node to serve content. This is great for serving Javascript, Images CSS (Video) because its static.&amp;nbsp;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Here is a good example. Your system can serve content in less then 5ms if the network is not involved. With the network overhead that content is served in 10ms time if you are close to the DC (say about 1200 miles). Yet your users on the east coast (assuming your dc is in the west coast) or better yet your users in Europe see this content in 355 ms. Around 300 ms or so users start to notice the lag; as a result this lag is proportional to increase in chances that the user will bounce. People hate waiting. Now do you optimize the backend to serve the content faster or do you put the content closer to the end user?&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Put the content closer to the end user to reduce the 350ms back down to 10ms. This is what cloudfront-an amazon product does for you. Here is a good &lt;a href="http://trac.cyberduck.ch/wiki/help/en/howto/cloudfront"&gt;wiki page&lt;/a&gt; to setup cloudfront. I've expanded on this to add some Apache Mod Rewrite rules to automate cache&amp;nbsp;invalidation&amp;nbsp;so I don't have to call an API to purge the CDN cache. Below is the mod rewrite rule&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;pre&gt;&lt;/pre&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;&amp;nbsp; &amp;nbsp; RewriteRule ^/static/(\d+)/(.*)? /static/$2 [NC,QSA,L]&lt;/div&gt;&lt;div&gt;&amp;nbsp; &amp;nbsp; RewriteRule ^/static/(\w+)/(\d+)/(.*)? /static/$1/$3 [NC,QSA,L]&lt;/div&gt;&lt;div&gt;&amp;nbsp; &amp;nbsp; RewriteRule ^/static/(\w+)/(\S+)/(\d+)/(.*)? /static/$1/$2/$4 [NC,QSA,L]&lt;/div&gt;&lt;/div&gt;&lt;div&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;what this says is&amp;nbsp;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;given a url&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;http://domain&lt;domain&gt;/static/12345/main.js&lt;/domain&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;serve from&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;http://domain/static/main.js&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;The dynamic url is generated by taking the abs crc32 of the file contents of each file. So if a file changes on disk so does the url breaking the cache and forcing cloudfront to refetch the content to display to the user. All this is calculated once during the deploy process.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;For instance&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;http://d1wuzpn2rb4qzi.cloudfront.net/static/jquery/240184024/jquery.min.js&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;http://d1wuzpn2rb4qzi.cloudfront.net/static/jquery maps to http://your.schoolfeed.com/static/jquery&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;240184024 - is the cache breaker by doing this during the deploy process&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;$hashes[$file] = abs(crc32(file_get_contents($file)));&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;then generating php code that is global to the templates&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;$str = "";&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Now when building the reference link&lt;/div&gt;&lt;div&gt;&lt;br /&gt;script type="text/javascript" src="{cloudfront file='/static/jquery/jquery.min.js'}"&lt;/div&gt;&lt;div&gt;&lt;script src="{cloudfront file='/static/jquery/jquery-1.6.2.min.js'}" type="text/javascript"&gt;&lt;/script&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;{cloudfront} is a smarty function that takes the input file and splits the directory putting&amp;nbsp;240184024&lt;/div&gt;&lt;div&gt;into the path for the cloudfront url&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;So what has this done on the system.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-EBr8pZytgvc/TvDcLs9yKMI/AAAAAAAAAFk/swnAoWU7ZKk/s1600/www_total_accesses.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="163" src="http://4.bp.blogspot.com/-EBr8pZytgvc/TvDcLs9yKMI/AAAAAAAAAFk/swnAoWU7ZKk/s320/www_total_accesses.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Around the 5pm hour we see a drop in www_accesses, that's due to switching to Cloudfront. There is nearly 60% in savings. schoolFeed has a lot of javascript files. Additionally there is still another optimization that can be done to group javascript files together to reduce the amount of GETS.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-hFuG6kVNQ2M/TvDd_CeBEPI/AAAAAAAAAFs/z7Lmn9NKMBY/s1600/www_Bytes.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="163" src="http://3.bp.blogspot.com/-hFuG6kVNQ2M/TvDd_CeBEPI/AAAAAAAAAFs/z7Lmn9NKMBY/s320/www_Bytes.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Here we see a 35% drop in bytes out as a result of the change.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;And the affect on mySQL is about 3-5% more traffic on the backend as users stick around longer since things are snapper.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-i9U2UEEWYEg/TvDgdwtYb2I/AAAAAAAAAF0/c0DaTjWWihM/s1600/selects.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="125" src="http://4.bp.blogspot.com/-i9U2UEEWYEg/TvDgdwtYb2I/AAAAAAAAAF0/c0DaTjWWihM/s320/selects.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Always keep this in mind. As one tier becomes faster or more performant the other tiers should have the capacity to keep up with demand.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-4564387788193304981?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/4564387788193304981/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=4564387788193304981' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4564387788193304981'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4564387788193304981'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/12/effect-of-using-cloudfront-and-why-it.html' title='The Effect of using Cloudfront and why it matters'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-EBr8pZytgvc/TvDcLs9yKMI/AAAAAAAAAFk/swnAoWU7ZKk/s72-c/www_total_accesses.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1432095340838741710</id><published>2011-11-22T12:55:00.001-08:00</published><updated>2011-11-22T16:19:48.626-08:00</updated><title type='text'>Using live code interrupts to produce stats which in turn improves code</title><content type='html'>How do you know that your code is fast? Is it fast for your test cases or is it fast for every case? When changes are made how does that affect your customers? How do you know over a period of time if the system is faster or slower.&lt;br /&gt;&lt;br /&gt;The same stat system which is used to track new installs, viral clicks, impressions, rates,&amp;nbsp; funnels, page flows, gauges, counters, etc is also used to let me know how fast code blocks are performing. How is this done?&lt;br /&gt;&lt;br /&gt;Since a Front Controller Design Pattern is used for my AJAX calls, I am able to wrap all calls in time deltas to produce a centralized stat on how fast all service calls are taking. So, each code change that is pushed; I can see if that code change slowed something down, or broke something all together.&lt;br /&gt;&lt;br /&gt;Here is the setup. Each Service Call Response Time is placed into buckets; less than 200ms, 201ms to 500ms, 501ms to 1 second, 1.001 second to 2 seconds and greater then 2 seconds. Anything over 500 ms is bad.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-9NTzL4JT1dI/TswgWqptOlI/AAAAAAAAAEw/GYRQfqoIF9k/s1600/service-Time-Feed+copy.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="253" src="http://1.bp.blogspot.com/-9NTzL4JT1dI/TswgWqptOlI/AAAAAAAAAEw/GYRQfqoIF9k/s320/service-Time-Feed+copy.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;On the 13th of November I see that service calls for the 2 second+ bucket are on the rise. Now the next step is to look at independent data sources to determine is this a system issue or a code issue. This is the tricky part because code issues can cause system issues. I use ganglia to separate out the system stats from code interrupt stats and interpret the results.&lt;br /&gt;&amp;nbsp; &lt;br /&gt;Now the System stats I look at to help me drill down to the issues are things that&amp;nbsp;&lt;style&gt;&lt;!-- /* Font Definitions */@font-face {font-family:Times; panose-1:2 0 5 0 0 0 0 0 0 0; mso-font-charset:0; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 0 0 0 1 0;}@font-face {font-family:Cambria; panose-1:2 4 5 3 5 4 6 3 2 4; mso-font-charset:0; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 0 0 0 1 0;} /* Style Definitions */p.MsoNormal, li.MsoNormal, div.MsoNormal {mso-style-parent:""; margin-top:0in; margin-right:0in; margin-bottom:10.0pt; margin-left:0in; mso-pagination:widow-orphan; font-size:12.0pt; font-family:"Times New Roman"; mso-ascii-font-family:Cambria; mso-ascii-theme-font:minor-latin; mso-fareast-font-family:Cambria; mso-fareast-theme-font:minor-latin; mso-hansi-font-family:Cambria; mso-hansi-theme-font:minor-latin; mso-bidi-font-family:"Times New Roman"; mso-bidi-theme-font:minor-bidi;}@page Section1 {size:8.5in 11.0in; margin:1.0in 1.25in 1.0in 1.25in; mso-header-margin:.5in; mso-footer-margin:.5in; mso-paper-source:0;}div.Section1 {page:Section1;}--&gt;&lt;/style&gt;&lt;span style="font-family: Times; font-size: 10pt;"&gt;encapsulate &lt;/span&gt; the problem that I am investigating.&amp;nbsp; For instance, service calls are a product of CPU and Memory usage from the www teir. These system calls use network resources that talk to databases, memcache and queues. Thus I ask myself what are some high level system metrics to help me figure out if there is a system issue? Well apache has a lot of high level stats like busy workers and requests per second. Busy workers are a product of memory, cpu and network resources. This is a good stat.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-yOLJb6KzsPU/TswzbS0VbEI/AAAAAAAAAFQ/Ag3gTVWb97M/s1600/www_Busy.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="139" src="http://2.bp.blogspot.com/-yOLJb6KzsPU/TswzbS0VbEI/AAAAAAAAAFQ/Ag3gTVWb97M/s320/www_Busy.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Here busy workers are seen to be increasing but not all that much. What is the slow down? Let's look at another ganglia stat - Requests per Second.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-weZl-fZzXF8/Tswzo71O6OI/AAAAAAAAAFY/cEOKrp0hj7Q/s1600/www-ReqPerSec+copy.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="139" src="http://1.bp.blogspot.com/-weZl-fZzXF8/Tswzo71O6OI/AAAAAAAAAFY/cEOKrp0hj7Q/s320/www-ReqPerSec+copy.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Wow the request per second is going up, Busy (above) is growing slightly; with hiccups - thus what I need to look at is the data tier since the wwws are scaling well (Higher request rates with the same www busy rate).&lt;br /&gt;&lt;br /&gt;Unfortunately the graphs for that are plagued with huge spikes caused by long term issues in RRD, so they are not show-able. A long story short, an increase in requests are from a sudden spike of new users and returning users; causing more concurrency on the backend exposing memcache evictions because of more active users. Thus I added more memcache servers and scheduled a rebalance of database data onto new servers; this should lower overall the service times on average without code changes.&lt;br /&gt;&lt;br /&gt;In summary, realtime code insights are just as valuable as knowing the number of installs or clicks from emails. You can do all sort of stuff like draw custom dots for code deployments that correlate with response times. Having this data is invaluable to keep your site fast at very little cost while giving you knowledge of the entire system.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1432095340838741710?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1432095340838741710/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1432095340838741710' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1432095340838741710'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1432095340838741710'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/11/using-live-code-interrupts-to-produce.html' title='Using live code interrupts to produce stats which in turn improves code'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-9NTzL4JT1dI/TswgWqptOlI/AAAAAAAAAEw/GYRQfqoIF9k/s72-c/service-Time-Feed+copy.png' height='72' width='72'/><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-275283870029642203</id><published>2011-10-25T12:38:00.000-07:00</published><updated>2011-10-25T12:38:10.993-07:00</updated><title type='text'>Handling the Hockey Stick Growth</title><content type='html'>&lt;a href="http://1.bp.blogspot.com/-NuqikPsWxhI/Tqbp4mnn56I/AAAAAAAAAEY/lkap8aaKfHo/s1600/schoolfeed.com+1+092+629+UVs+for+September+2011+++Compete.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="188" src="http://1.bp.blogspot.com/-NuqikPsWxhI/Tqbp4mnn56I/AAAAAAAAAEY/lkap8aaKfHo/s320/schoolfeed.com+1+092+629+UVs+for+September+2011+++Compete.png" width="320" /&gt;&lt;/a&gt;The term hockey stick is used to describe the effect of an app that suddenly goes viral. Take a look at the graph to the left. There is modest growth and suddenly, the app goes viral and takes off. It looks like a hockey stick, ala the term.&lt;br /&gt;&lt;br /&gt;This article is briefly going to touch on the points of how to handle the sudden growth at the lowest cost possible with a site that I helped build: &lt;a href="http://your.schoolfeed.com/perm.php"&gt;schoolFeed.com&lt;/a&gt;-a social network that reconnects classmates for Free.&lt;br /&gt;&lt;br /&gt;The main features of &lt;a href="http://your.schoolfeed.com/perm.php"&gt;schoolfeed.com&lt;/a&gt; is reconnecting classmates, ensuring that each classmate is well connected, a feed to keep classmates in touch with one another and interests that each classmate share. Additionally there is a photo experience to share with your online yearbook and more features to come.&lt;br /&gt;&lt;br /&gt;To handle the growth, enable rapid feature&amp;nbsp;development, keep the site up without waking me up, and keeping it cheap means a set of structure needs to be put in place.&lt;br /&gt;&lt;br /&gt;Suggestion #1: Keep the system architecture simple&lt;br /&gt;&lt;blockquote class="tr_bq"&gt;The architecture consists of PHP on the front end, &lt;a href="http://www.google.com/search?q=site%3Amysqldba.blogspot.com+memcache"&gt;Memcache&lt;/a&gt; to front database queries, a database on the backend, a queue service-&lt;a href="http://www.google.com/search?gcx=c&amp;amp;ix=c1&amp;amp;sourceid=chrome&amp;amp;ie=UTF-8&amp;amp;q=site%3Amysqldba.blogspot.com+gearman"&gt;Gearman&lt;/a&gt; to handle offline processing in&amp;nbsp;parallel; finally &lt;a href="http://www.google.com/search?q=site%3Amysqldba.blogspot.com+sendgrid"&gt;sendgrid&lt;/a&gt; to handle mail.&lt;/blockquote&gt;Suggestion #2: Keep the development environment simple&lt;br /&gt;&lt;blockquote class="tr_bq"&gt;The development environment did not start off too abstracted. A simple MVC model is used where the Model fronts the PDO database objects structure. The Controller is the service layer which is a &lt;a href="http://en.wikipedia.org/wiki/Front_Controller_pattern"&gt;Front Controller design pattern&lt;/a&gt;, and the php entry points to handle the model inputs. The View is in smarty because keeping the presentation layer&amp;nbsp;separate&amp;nbsp;from the&amp;nbsp;business&amp;nbsp;logic is&amp;nbsp;pivotal. Additionally this View is&amp;nbsp;separated&amp;nbsp;enough to replace smarty and or internationalize the strings in the future. Also JQUERY is used to make life so much simpler when supporting IE.&lt;/blockquote&gt;Suggestion #3: Monitor everything&lt;br /&gt;&lt;blockquote class="tr_bq"&gt;I use Nagios for alerting (Icinga), Ganglia for Trending, and a custom stat system backed by mySQL for reporting on code&amp;nbsp;interrupts, click through rates, feature adoption, K-Factor, &amp;nbsp;DAU per feature, MAU, WAU, Facebook Platform Health, site response time, site api response time, email send rate.&lt;/blockquote&gt;Suggestion #4: Design every layer to be distributed.&lt;br /&gt;&lt;blockquote class="tr_bq"&gt;If I run out of apache threads, I add more www servers. If my memcache eviction rate is to high, I add more memcache servers. If I need more database transactions per second I add more database servers and each layer is controlled from a config file enabling rapid deployment of servers to handle spikes in traffic. Since the database connection logic is controlled by the application, I drop a definition in an array and new traffic starts hitting a new database server. If the existing database server is loaded to much and I need to move data off of it. I take a xtrabackup of the server replicate it to a new server, set the pointer for a % of that traffic to the new server and clear up the old data on the original server. Or I can migrate individual entities. An entity is a user/school/interest/url/facebook id/etc.&lt;/blockquote&gt;&lt;br /&gt;Suggestion #5: Don't optimize to soon.&lt;br /&gt;&lt;blockquote class="tr_bq"&gt;The goal is to make each feature super fast, but building a super abstracted layer to support 1000s of devs is only&amp;nbsp;necessary&amp;nbsp;when you have 10s of devs :). Please don't interprete this as me advocating being sloppy-I'm saying its cool to allow your team to interact with SQL and write their own :). Additionally building custom servers to handle specific tasks, changing languages to get a specific feature is really not&amp;nbsp;necessary&amp;nbsp;in the&amp;nbsp;beginning. Supporting the product and&amp;nbsp;ensuring&amp;nbsp;the features do not take more then 200ms to generate or weeks to build said feature. This should be the focus to enable the hockey stick. In the early stage of a hockey stick; technology rarely is the cause for the growth-its building what your users may want and when your wrong throw that stuff away and actually build what they want. A helpful tool is to build features in a way where the feature or parts of the feature can be turned it off with a config change. This will save you a ton of headache without having to take the site down, while enabling pushing code out quickly and watching to see if its adopting prior to optimizing.&lt;/blockquote&gt;&lt;br /&gt;Suggestion #6: Plan for things to break and set up procedures to handle outages&lt;br /&gt;&lt;blockquote class="tr_bq"&gt;Things will break. The goal is to hide this fact from users or&amp;nbsp;inconvenience&amp;nbsp;them as little as possible. Schedule&amp;nbsp;maintenance&amp;nbsp;windows to fix the heavy stuff. Have a playbook to handle outages, if the play does not exist-write the play down. Finally automate common tasks. Remember if you don't want any user experiencing an outage-that costs a lot of money. Redundancy is expensive. Multiple Redundancy in multiple datacenters is even more expensive.&lt;/blockquote&gt;I hope these steps help you in your projects in the future. I have had the pleasure of handling multiple hockey sticks and following a basic rule/suggestion set has helped me each time. The end goal really is to give a great experience for your users, build a clean environment for your devs with your devs input and improve the product rapidly.&lt;br /&gt;&lt;br /&gt;Some stats: 3-5 web servers, 2 job boxes 2 database servers we are able to handle well over 100K DAU.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-275283870029642203?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/275283870029642203/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=275283870029642203' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/275283870029642203'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/275283870029642203'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/10/handling-hockey-stick-growth.html' title='Handling the Hockey Stick Growth'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-NuqikPsWxhI/Tqbp4mnn56I/AAAAAAAAAEY/lkap8aaKfHo/s72-c/schoolfeed.com+1+092+629+UVs+for+September+2011+++Compete.png' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5383519901113033521</id><published>2011-10-20T18:17:00.000-07:00</published><updated>2011-10-21T11:16:49.091-07:00</updated><title type='text'>Facebook should launch FBCloud and compete directly with Amazon, Google and others</title><content type='html'>&lt;style&gt;&lt;!-- /* Font Definitions */@font-face {font-family:Times; panose-1:2 0 5 0 0 0 0 0 0 0; mso-font-charset:0; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 0 0 0 1 0;}@font-face {font-family:Cambria; panose-1:2 4 5 3 5 4 6 3 2 4; mso-font-charset:0; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 0 0 0 1 0;} /* Style Definitions */p.MsoNormal, li.MsoNormal, div.MsoNormal {mso-style-parent:""; margin-top:0in; margin-right:0in; margin-bottom:10.0pt; margin-left:0in; mso-pagination:widow-orphan; font-size:12.0pt; font-family:"Times New Roman"; mso-ascii-font-family:Cambria; mso-ascii-theme-font:minor-latin; mso-fareast-font-family:Cambria; mso-fareast-theme-font:minor-latin; mso-hansi-font-family:Cambria; mso-hansi-theme-font:minor-latin; mso-bidi-font-family:"Times New Roman"; mso-bidi-theme-font:minor-bidi;}p {margin:0in; margin-bottom:.0001pt; mso-pagination:widow-orphan; font-size:10.0pt; font-family:"Times New Roman"; mso-ascii-font-family:Times; mso-fareast-font-family:Cambria; mso-fareast-theme-font:minor-latin; mso-hansi-font-family:Times; mso-bidi-font-family:"Times New Roman";}@page Section1 {size:8.5in 11.0in; margin:1.0in 1.25in 1.0in 1.25in; mso-header-margin:.5in; mso-footer-margin:.5in; mso-paper-source:0;}div.Section1 {page:Section1;}--&gt;&lt;/style&gt;&lt;br /&gt;&lt;style&gt;&lt;!-- /* Font Definitions */@font-face {font-family:Times; panose-1:2 0 5 0 0 0 0 0 0 0; mso-font-charset:0; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 0 0 0 1 0;}@font-face {font-family:Cambria; panose-1:2 4 5 3 5 4 6 3 2 4; mso-font-charset:0; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 0 0 0 1 0;} /* Style Definitions */p.MsoNormal, li.MsoNormal, div.MsoNormal {mso-style-parent:""; margin-top:0in; margin-right:0in; margin-bottom:10.0pt; margin-left:0in; mso-pagination:widow-orphan; font-size:12.0pt; font-family:"Times New Roman"; mso-ascii-font-family:Cambria; mso-ascii-theme-font:minor-latin; mso-fareast-font-family:Cambria; mso-fareast-theme-font:minor-latin; mso-hansi-font-family:Cambria; mso-hansi-theme-font:minor-latin; mso-bidi-font-family:"Times New Roman"; mso-bidi-theme-font:minor-bidi;}a:link, span.MsoHyperlink {color:blue; text-decoration:underline; text-underline:single;}a:visited, span.MsoHyperlinkFollowed {mso-style-noshow:yes; color:purple; text-decoration:underline; text-underline:single;}p {margin:0in; margin-bottom:.0001pt; mso-pagination:widow-orphan; font-size:10.0pt; font-family:"Times New Roman"; mso-ascii-font-family:Times; mso-fareast-font-family:Cambria; mso-fareast-theme-font:minor-latin; mso-hansi-font-family:Times; mso-bidi-font-family:"Times New Roman";}@page Section1 {size:8.5in 11.0in; margin:1.0in 1.25in 1.0in 1.25in; mso-header-margin:.5in; mso-footer-margin:.5in; mso-paper-source:0;}div.Section1 {page:Section1;}--&gt;&lt;/style&gt;&lt;br /&gt;&lt;div style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt;"&gt;AnotherFacebook should do post by someone outside of&amp;nbsp;Facebook; but it’s a moneymakerthat Facebook has not tried and probably has the best chance of succeeding at(not like deals har har - jab, jab). Some of the best DEV Ops work at Facebook.Facebook knows scale. Facebook knows system management. Facebook built the mostadvance data-center on the planet. This stuff is great it shows that they can doit, but what’s the motivation for the app developer to deploy in a FacebookCloud? Simply put Latency. This is the real issue, for me, really a selfish desire&amp;nbsp;tohave my app move as fast as Facebook's Apps while using Facebook Data; a seamless integration if you will.&amp;nbsp;For Facebook, it’s good because they can help me make my users happier whilemaking tons of Cash. If my app is in the same data-center as the center of data,my app can move faster thus giving my users a better experience.&lt;/div&gt;&lt;div style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt;"&gt;Hereis an example. When doing a graph call for Facebook friends, the backendsystems can do it in ms time yet the JSON reaches the caller in the 100ms timeframe over the WAN from my servers in EC2-west1c to Facebook Servers in Oregon.If I'm in the data-center that holds the data (&lt;a href="http://www.facebook.com/prinevilledatacenter"&gt;Oregon&lt;/a&gt;) my app speedsup 10 times, since that 100ms R(t) turns to 5-10ms.&lt;/div&gt;&lt;div style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt;"&gt;AdditionallyFacebook houses some of the most advance tech that lots of people around theweb use. Such as MEMCACHE. Facebook could manage that for you. In fact theyhave PETA BYTES of memory for their own app with automatic key management between DCs (wow). Offer Facebook Hosted MySQL with Flash Cache forHigh density IOPS. Each Facebook DB server has Solid State Disks, use that tobuffer IOPS for subscribed developers to FBCloud. With their tools to automatically migrate datato another server, building new Instances would be a snap without having to usea SAN. Although you could use SSDs to buffer SAN writes/reads for easier management with great R(t).Facebook stats on HBASE, Facebook Varnish, a fast CDN-everything that they do ascommercial product. I've seen their tools, its better then enterprise qualityand nearly all of them have an API. Facebook Culture is platform focused. Iassume if you don't build an API for your tool your mocked.&lt;/div&gt;&lt;div style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt;"&gt;Howwould Facebook make money? Charge on CPU resources just like Amazon. Charge onIOPS, charge on managed Memcache size, charge on Data size. Charge for BCP.With this adding to Facebook's platform, Facebook could make money on the FrontEnd from Ads, on currency and finally on the API indirectly while giving theEnd User an entire platform guaranteed to be fast and redundant in multipledata-centers.&lt;/div&gt;&lt;div style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: .1pt; margin-left: 0in; margin-right: 0in; margin-top: .1pt;"&gt;Ihave an entire vision that would make a ton of cash, but really would make myusers and me as a developer happier.&lt;br /&gt;&lt;br /&gt;PS This post went out to fast, with grammar and spelling mistakes. Should be fixed now, my apologies. &lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5383519901113033521?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5383519901113033521/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5383519901113033521' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5383519901113033521'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5383519901113033521'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/10/facebook-should-launch-fbcloud-and.html' title='Facebook should launch FBCloud and compete directly with Amazon, Google and others'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5549811965188461717</id><published>2011-09-21T10:58:00.000-07:00</published><updated>2011-09-30T08:51:10.451-07:00</updated><title type='text'>Stump the Murph: ulimit, pam and linux</title><content type='html'>There is a game that a small group of friends and I have been playing since my Friendster years. It's called Stump the Murph. Basically if there is some weird problem in Linux mainly but it's in a variety of subjects-that we can't figure out we pass it to one of our friends Kevin Murphy. In 8 years I believe I stumped him once but I can't remember what it is so it doesn't count.&lt;br /&gt;&lt;br /&gt;Here is the problem&lt;br /&gt;&lt;br /&gt;&lt;div style="background: none repeat scroll 0 0 #F5F5F5; border: 1px solid #E5E5E5;"&gt;&lt;pre&gt;SQLSTATE[HY000] [1135] Can't create a new thread (errno 11); if you are not out of available memory, you can consult the manual for a possible OS-dependent bug&lt;br /&gt;&lt;/pre&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style="font-family: arial, sans-serif; font-size: x-small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: white; font-family: arial, sans-serif; font-size: 13px;"&gt;"Obviously" this means you need to raise the ulimit for the process running mysql. I say "obviously" because this error means different things. In most cases it means that the server ran out of memory. perror 11 says&amp;nbsp;OS error code &amp;nbsp;11: &amp;nbsp;Resource temporarily unavailable, yet when there is enough memory there may be a pam_limit issue. In my case there is.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: white; font-family: arial, sans-serif; font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: white; font-family: arial, sans-serif; font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: white; font-family: arial, sans-serif; font-size: 13px;"&gt;So I did the following&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: white; font-family: arial, sans-serif; font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: white; font-family: arial, sans-serif; font-size: 13px;"&gt;in /etc/security/limits.conf I added this&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: white; font-family: arial, sans-serif; font-size: 13px;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;div style="background: none repeat scroll 0 0 #F5F5F5; border: 1px solid #E5E5E5;"&gt;&lt;pre&gt;mysql   soft    nofile  10240&lt;br /&gt;mysql   hard    nofile  1537454&lt;br /&gt;mysql   soft    nproc   32768&lt;br /&gt;mysql   hard    nproc   65535&lt;br /&gt;&lt;/pre&gt;&lt;/div&gt;&lt;br /&gt;yet when I test the changes su - mysql &lt;br /&gt;I get&lt;br /&gt;&amp;nbsp; &lt;br /&gt;&lt;br /&gt;&lt;div style="background: none repeat scroll 0 0 #F5F5F5; border: 1px solid #E5E5E5;"&gt;su: pam_limits(su-l:session): Could not set limit for 'nofile': Operation not permitted&lt;/div&gt;&lt;br /&gt;So my next course of action is to check&lt;br /&gt;&lt;br /&gt;/etc/pam.d/system-auth &lt;br /&gt;&lt;br /&gt;wait a second it has &lt;br /&gt;&lt;br /&gt;&lt;div style="background: none repeat scroll 0 0 #F5F5F5; border: 1px solid #E5E5E5;"&gt;session     required      pam_limits.so&lt;/div&gt;and &lt;br /&gt;&lt;pre&gt;&lt;/pre&gt;&lt;pre&gt;/etc/pam.d/su calls&lt;br /&gt;&lt;br /&gt;session         include         system-auth&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;thus I don't need to addsession     required      pam_limits.so&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Now the game of Stump the Murph begins:&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;In about 1/2 hour Murph figured out the solution! He deduced that since&amp;nbsp;&lt;br /&gt;&lt;br /&gt;cat /proc/sys/fs/file-max&lt;br /&gt;1537454&lt;br /&gt;&lt;br /&gt;you can't set the hard limit of nofile to 1537454 because in theory you could starve the kernel from file descriptorsthus from murph's suggestion I did &lt;br /&gt;&lt;br /&gt;&lt;div style="background: none repeat scroll 0 0 #F5F5F5; border: 1px solid #E5E5E5;"&gt;&lt;pre&gt;mysql      soft    nofile  10240&lt;/pre&gt;&lt;pre&gt;mysql      hard    nofile  768727&lt;br /&gt;mysql      soft    nproc   32768&lt;br /&gt;mysql      hard    nproc   65535&lt;br /&gt;&lt;/pre&gt;&lt;/div&gt;&lt;br /&gt;Thanks Murph!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5549811965188461717?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5549811965188461717/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5549811965188461717' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5549811965188461717'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5549811965188461717'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/09/stump-murph-ulimit-pam-and-linux.html' title='Stump the Murph: ulimit, pam and linux'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-286449648217920976</id><published>2011-09-15T10:19:00.000-07:00</published><updated>2011-12-21T12:48:55.600-08:00</updated><title type='text'>Amazon EBS mySQL, Disk Throughput and the Dual Edge of Software Raid</title><content type='html'>Amazon's EBS system is just a nice interface to a SAN subsystem, which manages the attachments of SAN LUNs. The problem with SAN when compared to Local SAS drives is latency and the shared controller, which caches IOPS for very distinct load profiles. Each load profile has an "optimized" cache profile from the SAN's redundant controller system. You may be able to attach petabytes of disks, but this system cannot utilize the true throughput when compared to small locally attached SAS Drives. Now the management portion of awesome. I love having the ability to mount more disk but I rarely need more space, I need speed.&lt;br /&gt;&lt;br /&gt;How to get Speed out of Amazon's EBS volumes: Software RAID it!&lt;br /&gt;&lt;div style="background: #D3D3D3; border: 1px solid #000;"&gt;&lt;pre style="margin-left: 10px;"&gt;mdadm --create /dev/md1 -v --raid-devices=8 --chunk=256 --level=raid10 /dev/xvdk /dev/xvdl /dev/xvdm /dev/xvdn /dev/xvdo /dev/xvdp /dev/xvdq /dev/xvdr&lt;/pre&gt;&lt;/div&gt;&lt;br /&gt;Take 8 EBS 125 GB volumes create a raid10 array with a 256KB chunk size. After various and mind numbing benchmarks I found that 256K is a good sweet spot. Feel free to do your own benches. The results have to be interpreted because of the nature of using a shared resource.&lt;br /&gt;&lt;br /&gt;What I end up with is a 500GB partition, and I am roughly able to get around 22-25 MB of second of random I/O from 20 threads. To compare this to an 8 DISK 15K RPM PERC-6 2.5" SAS system I am able to get around 44 MB of second at a constant 1-2 ms response time for the same physical space. EBS volumes Response time per iop range from 6ms to 200ms. This sucks. Note: these numbers are based on RANDOM I/O 16KB Page size (4 iops per block write), what INNODB uses not sequential I/O.&lt;br /&gt;&lt;br /&gt;Here is some iostat numbers from a live box with this configuration&lt;br /&gt;&lt;pre&gt;avg-cpu:  %user   %nice %system %iowait  %steal   %idle&lt;br /&gt;           1.83    0.00    1.75   22.32    0.08   74.01&lt;br /&gt;&lt;br /&gt;Device:         rrqm/s   wrqm/s     r/s     w/s   rsec/s   wsec/s avgrq-sz avgqu-sz   await  svctm  %util&lt;br /&gt;xvdap1            0.00     0.00    0.00    0.00     0.00     0.00     0.00     0.00    0.00   0.00   0.00&lt;br /&gt;xvdh              0.00     0.00    0.00    1.00     0.00     8.00     8.00     0.00    0.00   0.00   0.00&lt;br /&gt;xvdk              0.00     0.00   34.40   26.40  1100.80  1503.20    42.83     0.49    8.01   6.39  38.88&lt;br /&gt;xvdl              0.00     0.00   13.20   26.40   422.40  1503.20    48.63     0.27    6.71   4.38  17.36&lt;br /&gt;xvdm              0.00     0.20   32.40   27.00  1036.80  1524.20    43.11     0.30    5.13   4.19  24.88&lt;br /&gt;xvdn              0.00     0.20    9.40   27.00   300.80  1524.20    50.14     0.15    4.11   2.48   9.04&lt;br /&gt;xvdo              0.00     0.00   30.20   27.40   968.00  1496.80    42.79     0.45    7.76   6.56  37.76&lt;br /&gt;xvdp              0.00     0.00   14.60   27.40   478.40  1496.80    47.03     0.22    5.26   3.92  16.48&lt;br /&gt;xvdq              0.00     0.00   31.20   25.60   998.40  1501.60    44.01     0.38    6.73   5.32  30.24&lt;br /&gt;xvdr              0.00     0.00    9.80   25.60   313.60  1501.60    51.28     0.16    4.50   2.35   8.32&lt;br /&gt;md1               0.00     0.00  174.80   98.60  5606.40  6009.80    42.49     0.00    0.00   0.00   0.00&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;So, now that I have acceptable speed what is the drawback? A weekly cron job that runs a check across the raid array. On Amazon’s EBS system it cuts my throughput in 1/2&lt;br /&gt;&lt;br /&gt;For my Amazon Linux system the cron job is located&lt;br /&gt;&lt;div style="background: #D3D3D3; border: 1px solid #000;"&gt;&lt;pre style="margin-left: 10px;"&gt;-rwxr-xr-x 1 root root 2770 Jan 16 &amp;nbsp;2011 /etc/cron.weekly/99-raid-check&lt;br /&gt;&lt;/pre&gt;&lt;/div&gt;&lt;br /&gt;It essentially runs&lt;br /&gt;&lt;br /&gt;echo check &amp;gt; /sys/block/md1/md/sync_action&lt;br /&gt;&lt;br /&gt;Yet, the check lasts for around 9000 min or 6.25 days! Thus I will only have .75 days of full throughput.&lt;br /&gt;&lt;br /&gt;So to stop this I must run&lt;br /&gt;echo idle &amp;gt; /sys/block/md1/md/sync_action&lt;br /&gt;&lt;br /&gt;I do not recommend turning off the check, its needed. Now to find out a way to make this check happen faster.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-286449648217920976?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/286449648217920976/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=286449648217920976' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/286449648217920976'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/286449648217920976'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/09/amazon-ebs-mysql-disk-throughput-and.html' title='Amazon EBS mySQL, Disk Throughput and the Dual Edge of Software Raid'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6642230009896161476</id><published>2011-09-07T18:47:00.000-07:00</published><updated>2011-09-07T18:47:51.602-07:00</updated><title type='text'>Virtual Currency and Tracking What is Given Where</title><content type='html'>&lt;style&gt;&lt;!-- /* Font Definitions */@font-face	{font-family:Times;	panose-1:2 0 5 0 0 0 0 0 0 0;	mso-font-charset:0;	mso-generic-font-family:auto;	mso-font-pitch:variable;	mso-font-signature:3 0 0 0 1 0;}@font-face	{font-family:Cambria;	panose-1:2 4 5 3 5 4 6 3 2 4;	mso-font-charset:0;	mso-generic-font-family:auto;	mso-font-pitch:variable;	mso-font-signature:3 0 0 0 1 0;} /* Style Definitions */p.MsoNormal, li.MsoNormal, div.MsoNormal	{mso-style-parent:"";	margin-top:0in;	margin-right:0in;	margin-bottom:10.0pt;	margin-left:0in;	mso-pagination:widow-orphan;	font-size:12.0pt;	font-family:"Times New Roman";	mso-ascii-font-family:Cambria;	mso-ascii-theme-font:minor-latin;	mso-fareast-font-family:Cambria;	mso-fareast-theme-font:minor-latin;	mso-hansi-font-family:Cambria;	mso-hansi-theme-font:minor-latin;	mso-bidi-font-family:"Times New Roman";	mso-bidi-theme-font:minor-bidi;}p	{margin:0in;	margin-bottom:.0001pt;	mso-pagination:widow-orphan;	font-size:10.0pt;	font-family:"Times New Roman";	mso-ascii-font-family:Times;	mso-fareast-font-family:Cambria;	mso-fareast-theme-font:minor-latin;	mso-hansi-font-family:Times;	mso-bidi-font-family:"Times New Roman";}@page Section1	{size:8.5in 11.0in;	margin:1.0in 1.25in 1.0in 1.25in;	mso-header-margin:.5in;	mso-footer-margin:.5in;	mso-paper-source:0;}div.Section1	{page:Section1;}--&gt;&lt;/style&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;VirtualCurrency is currency, which is used inside games, websites, applications, etc. Itcan be bought and traded and in many cases has a conversion to real currency.For instance Facebook Credits, this is virtual currency and each credit isworth 10 cents.&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;Acommon problem I run into when building Currency Sinks or earns is; what partof the application gave out the currency and why? Some techniques I used tomanage this problem are to create a constants file of a bunch of action ids andused the constants wherever the application writes updates to the balance.&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;Thiscauses a problem. Developers will just reuse the same id for very distinctreasons. When debugging situations I have to trace through a bunch of code. &lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;Additionallyproblems can arise on deploy, because the use of currency change maybe pushedout yet the constants file is not. This causes data issues and or evenpreventable exceptions disrupting the customer experience.&amp;nbsp; The laterpossibility is much more offensive to me personally then data corruption. I goabove and beyond to prevent any negative views a customer may have. My workshouldn't piss anyone off it should enable them.&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;I'vetackled the problem by manually managing where the currency was deducted from,but this is the last time. If you do the same thing more then once and it caneasily be solved by automation, automate it to make your life easier -- mymotto.&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;Howam I tackling the problem now?&lt;/span&gt;&lt;/div&gt;&lt;span style="font-size: small;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="font-family: inherit; margin: 0.1pt 0in;"&gt;&lt;span style="font-size: small;"&gt;Bydefault I am adding the calling Objects class and method to my CurrencyHistoryTable with each Currency Transaction and top app name. Now on the row basis Ican track what class and method changed the balance and where it was called. Belowis a table structure used to track currency and history of the currency balancechange.&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;pre class="brush:sql"&gt;--&lt;br /&gt;-- Holds the Balance of a users currency&lt;br /&gt;--&lt;br /&gt;DROP TABLE IF EXISTS Currency;&lt;br /&gt;CREATE TABLE `Currency` (&lt;br /&gt;  `userId` bigint unsigned NOT NULL,&lt;br /&gt;  `regular` int NOT NULL,&lt;br /&gt;  `premium` int NOT NULL,&lt;br /&gt;  PRIMARY KEY (`userId`)&lt;br /&gt;) ENGINE=InnoDB;&lt;br /&gt;&lt;br /&gt;--&lt;br /&gt;-- Holds the info on How the balance is generated&lt;br /&gt;--&lt;br /&gt;DROP TABLE IF EXISTS CurrencyHistory;&lt;br /&gt;CREATE TABLE CurrencyHistory (&lt;br /&gt;    transactionId BIGINT UNSIGNED NOT NULL COMMENT "Globally unique Id"&lt;br /&gt;    userId BIGINT UNSIGNED NOT NULL COMMENT "User Identifier",&lt;br /&gt;    itemId bigint unsigned NOT NULL DEFAULT 0 "Unique Identifier on what was bought or sold",&lt;br /&gt;    itemOwnerId bigint unsigned NOT NULL DEFAULT 0 "User that was the last owner",&lt;br /&gt;    regularAmount INT NOT NULL "How much was earned or sold",&lt;br /&gt;    premiumAmount INT NOT NULL "How much was earned or sold for premium currency",&lt;br /&gt;    createDate TIMESTAMP NOT NULL "When this event occurred",&lt;br /&gt;    callStackId DEFAULT NULL "What part of the code base contains the code that changed currency the lookup table is generated on the fly from the json string and stored offline"&lt;br /&gt;    PRIMARY KEY(userId, createDate, transactionId)&lt;br /&gt;) Engine=InnoDB;&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Now to modify Currency here is some PHP code:&lt;br /&gt;&lt;pre class="brush:php"&gt;&lt;br /&gt;  /**&lt;br /&gt;    * Update user's balance&lt;br /&gt;    * &lt;br /&gt;    * @param int $userId&lt;br /&gt;    * @param int $regularDelta&lt;br /&gt;    * @param int $premiumDelta&lt;br /&gt;    * @return bool&lt;br /&gt;    */&lt;br /&gt;    public function CurrencyDB_updateBalance($userId, $regularDelta = 0, $premiumDelta = 0,  $itemId = 0, $itemOwnerId = 0) {&lt;br /&gt;&lt;br /&gt;        $this-&amp;gt;validate-&amp;gt;id($userId);&lt;br /&gt;        $this-&amp;gt;validate-&amp;gt;int($regularDelta, $premiumDelta);&lt;br /&gt;        if( $regularDelta == 0 &amp;amp;&amp;amp; $premiumDelta == 0 ) {&lt;br /&gt;            return 0;&lt;br /&gt;        }&lt;br /&gt;&lt;br /&gt;        $this-&amp;gt;shard($userId)-&amp;gt;beginTransaction('updateBalance');&lt;br /&gt;        try {&lt;br /&gt;          &lt;br /&gt;            if( $regularDelta &amp;lt; 0 || $premiumDelta &amp;lt; 0 ) { &lt;br /&gt;                $row = $this-&amp;gt;shard($userId)-&amp;gt;selectRow("SELECT * FROM Currency WHERE userId=? FOR UPDATE", array($userId));&lt;br /&gt;                if( $row-&amp;gt;regular &amp;lt; -$regularDelta || $row-&amp;gt;premium &amp;lt; -$premiumDelta ) {&lt;br /&gt;                    throw new CurrencyDBException("Insufficient funds", CurrencyDBException::INSUFFICIENT_FUNDS);&lt;br /&gt;                }&lt;br /&gt;            }&lt;br /&gt;            &lt;br /&gt;            // modify the balance&lt;br /&gt;            $query = "INSERT INTO Currency(userId, regular, premium) VALUES(?, ?, ?) ON DUPLICATE KEY UPDATE regular=regular+VALUES(regular), premium=premium+VALUES(premium)";&lt;br /&gt;            $this-&amp;gt;shard($userId)-&amp;gt;update($query, array($userId, $regularDelta, $premiumDelta), $this-&amp;gt;cacheKey($userId));&lt;br /&gt;            &lt;br /&gt;            // get the GUID and record the details of the transaction&lt;br /&gt;            $transactionId = $this-&amp;gt;getTicket();&lt;br /&gt;            $query = "INSERT INTO CurrencyHistory(transactionId, userId, itemId, itemOwnerId, regularAmount, premiumAmount, createDate, fromWhere) VALUES(?, ?, ?, ?, ?, ?, NOW(), ?)";&lt;br /&gt;            $callStackId = $this-&amp;gt;getCallStackId(); // get the backtrace as an ID&lt;br /&gt;            $this-&amp;gt;shard($userId)-&amp;gt;update($query, array($transactionId, $userId, $itemId, $itemOwnerId, $regularDelta, $premiumDelta, $callStackId));&lt;br /&gt;            &lt;br /&gt;            $this-&amp;gt;shard($userId)-&amp;gt;commit('updateBalance');&lt;br /&gt;&lt;br /&gt;        } catch (Exception $e) {&lt;br /&gt;&lt;br /&gt;            $this-&amp;gt;shard($userId)-&amp;gt;rollback('updateBalance');&lt;br /&gt;            throw $e;&lt;br /&gt;&lt;br /&gt;        }&lt;br /&gt;        return 1;&lt;br /&gt;    }&lt;br /&gt;&lt;br /&gt; /**&lt;br /&gt;    * Return an Id for the code stack&lt;br /&gt;    * &lt;br /&gt;    * @return bigint&lt;br /&gt;    */&lt;br /&gt;   private function getCallStackId(){&lt;br /&gt;        if ($GLOBALS['cfg']['disable_feature_backtrace_sql'] != 1){&lt;br /&gt;        &lt;br /&gt;            $trace      = debug_backtrace();&lt;br /&gt;            $notdone    = 1;&lt;br /&gt;            $numLoops   = 0;&lt;br /&gt;            $maxLoops   = 10; // hope its not nested 10 levels deep&lt;br /&gt;        &lt;br /&gt;            while($numLoops++ &amp;lt; $maxLoops){&lt;br /&gt;            &lt;br /&gt;                $test = (strpos($trace[0]['class'], 'CurrencyDB') === FALSE ? 0 : 1);&lt;br /&gt;&lt;br /&gt;                if (!$test){&lt;br /&gt;                    break;&lt;br /&gt;                }&lt;br /&gt;                array_shift($trace);&lt;br /&gt;            }&lt;br /&gt;        &lt;br /&gt;&lt;br /&gt;            $loc = array();&lt;br /&gt;            &lt;br /&gt;            &lt;br /&gt;            $loc[] = substr(str_replace("/var/www/html","",$_SERVER['PHP_SELF']), 1);&lt;br /&gt;            $loc[] = ($trace[0]['class'] ? $trace[0]['class'] : 'main');&lt;br /&gt;            $loc[] = $trace[0]['function'] ? $trace[0]['function'].'()' : 'called_from_script';&lt;br /&gt;            return $this-&amp;gt;getCodeLookupEntityId(json_encode($loc));&lt;br /&gt;                &lt;br /&gt;        }&lt;br /&gt;        &lt;br /&gt;        return 0;&lt;br /&gt; }       &lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;pre class="brush:sql"&gt;&lt;br /&gt;mysql&gt; select * from CodeLookup;&lt;br /&gt;+----------------------+------------------------------------------------------------------------+&lt;br /&gt;| entityId             | json                                                                   |&lt;br /&gt;+----------------------+------------------------------------------------------------------------+&lt;br /&gt;| 17113479085265723743 | ["test\/dathan\/test_CheckBonus.php","CheckBonus","CheckBonus_give()"] |&lt;br /&gt;+----------------------+------------------------------------------------------------------------+&lt;br /&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6642230009896161476?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6642230009896161476/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6642230009896161476' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6642230009896161476'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6642230009896161476'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/09/virtual-currency-and-tracking-what-is.html' title='Virtual Currency and Tracking What is Given Where'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5415302807696737915</id><published>2011-08-31T14:45:00.000-07:00</published><updated>2011-08-31T14:45:31.647-07:00</updated><title type='text'>Replacing sendmail and /var/spool/mail/mqueue with Gearman</title><content type='html'>I am obsessed with making things faster without spending an extraordinary amount of time reconstructing the wheel. I ran into an issue with sending mail-that I couldn't work around with a pure sendmail configuration. I needed to write the body of mail and have it queue, then sendmail send it. From the way sendmail works I couldn't force mail to queue then send without doing ugly hacks like blocking outbound port 25 connections. I tried all the common tweaks to make sendmail send faster, such as&lt;br /&gt;&lt;br /&gt;&amp;nbsp;&lt;a href="http://lildude.co.uk/howto-prevent-sendmail-from-using-dns"&gt;disabling sendmail from using dns&lt;/a&gt;&lt;br /&gt;&amp;nbsp;Using options such as FEATURE(`accept_unresolvable_domains') and FEATURE(`nocanonify')&lt;br /&gt;&lt;br /&gt;but none worked. In fact I was generating so much traffic that sendmail would refuse to accept anymore commands. Thus I lost emails. Let me describe some background of the setup and the growth rate to explain the problem in more detail.Sendmail is configured to dump all mail to sendgrid, a Cloud provider that handles your email relationship for you. Thus there is no need to reverse dns or do any checks since Sendgrid does it for me. All I need is to dump the mail as quickly as possible to SendGrid.&lt;br /&gt;&lt;br /&gt;I found that sendmail is not fast enough to dump mail to SendGrid. I could of used other transfer agents but I don't want to configure more things.The environment is very simple. PHP talks to localhost port 25 to dump the generated email to sendmail. Sendmail will try to connect to sendgrid, if so, block the client (Apache) until the mail is sent. The average send time is 1 second, thus the apache server blocks for 1 second per email sent. Blocking is bad especially with this case, since a social application could contact 1000s of users from a logged-in user's action. Let's imagine that one user generated event can contact 5000 users. If each email takes 1 second that takes 5000 seconds for that process to finish.Now that the problem is defined, lets find a good solution which can be handed off.&lt;br /&gt;&lt;br /&gt;I picked Gearman. I now build the email and write to a gearman queue. Around 30 workers are listening to the queue and will dump the mail directly to smtp.sendgrid.net via the PHP module Swift. What was gained in doing this?&lt;br /&gt;&lt;ul&gt;&lt;li&gt;Contacting 5000 email recipients takes 200ms since I am writing to memory&lt;/li&gt;&lt;li&gt;Apache does not block for to long (220ms).&lt;/li&gt;&lt;li&gt;I do not use localdisk since sendmail is bypassed. /var/spool/mail/mqueue is not used&lt;/li&gt;&lt;li&gt;I save on diskio and cpu cycles&lt;/li&gt;&lt;li&gt;I can now send at a higher rate with hardly any errors.&lt;/li&gt;&lt;li&gt;I can scale with more workers&lt;/li&gt;&lt;/ul&gt;Yet with all these gains I am exposed to loosing email at this moment in time. If sendgrid goes down and the queue builds up-I can run out of memory. There are ways to work around this such as setting up persistent storage with Gearman, so this exposure is mitigable.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-TxWNKCHUL-k/Tl6n58U7x2I/AAAAAAAAAEI/3-RXx0GGDG0/s1600/gear_total_queue.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="76" src="http://3.bp.blogspot.com/-TxWNKCHUL-k/Tl6n58U7x2I/AAAAAAAAAEI/3-RXx0GGDG0/s200/gear_total_queue.png" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;This image shows how large my gearman queue gets. Its largest size is 400 at any given time, for 30 workers. As a result I am sending 300K emails a day and the email event is no more then 1 min old from when it was created.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5415302807696737915?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5415302807696737915/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5415302807696737915' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5415302807696737915'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5415302807696737915'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/08/replacing-sendmail-and.html' title='Replacing sendmail and /var/spool/mail/mqueue with Gearman'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-TxWNKCHUL-k/Tl6n58U7x2I/AAAAAAAAAEI/3-RXx0GGDG0/s72-c/gear_total_queue.png' height='72' width='72'/><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3933847645017779621</id><published>2011-07-29T14:23:00.000-07:00</published><updated>2011-07-29T14:23:32.875-07:00</updated><title type='text'>Automated Innodb Hot Backup to S3 from ec2 with a simple bash script, innobackup and jets3t</title><content type='html'>Backing up data is always necessary, especially for Disaster Recovery and Business Continuity Planning (BCP). One rule of thumb for me is let the computer do the work by automating repetitive tasks. Running things by hand over and over sucks, so if I do something more then once I typically automate the process. Backing up INNODB data is a good example of a solution that requires automation.&lt;br /&gt;&lt;br /&gt;I always start off with what are the requirements and limitations when deciding to automate something. My requirement was simple. Every night at 2am backup the database and upload it to s3 without downtime. The limitation is to avoid downtime, I must keep in mind that the database cannot go down and the tables cannot be locked. Thus I use xtradbbackup provided by Percona.&lt;br /&gt;&lt;br /&gt;The second part of the requirement is to upload the data to s3 automatically. This is not hard, but there is a limitation. Uploading files &gt; 5GB is not possible, and typically anything over 200MB has an increased error rate on average. Good news is the guys that make JetS3t, a java based uploader tool, solved this for me.&lt;br /&gt;&lt;br /&gt;JetS3t works on NIX and Windows, there is a shell and bat script for your OS flavor. It supports huge file uploads via the amazon multipart upload api, and can upload data in parallel.&lt;br /&gt;&lt;br /&gt;Ideally I would stream a backup directly to S3 all in one step, yet I don't have the time to code that. Maybe in the future. So the next best thing is to do it in roughly two steps. Below is a BASH script that backups the mysql database, uploads the backup to S3.&lt;br /&gt;&lt;br /&gt;&lt;pre class="brush: bash;"&gt;#!/bin/bash&lt;br /&gt;&lt;br /&gt;INNOBACKUP="/usr/bin/innobackupex"&lt;br /&gt;INNOBACKUP_OPTIONS="--parallel=4 --user=backup --password=****"&lt;br /&gt;BACKUPDIR="/sqldata/backups"&lt;br /&gt;&lt;br /&gt;S3BUCKET="dbprodbackups"&lt;br /&gt;JETS3="/usr/local/jets3t/bin/synchronize.sh UP $S3BUCKET $BACKUPDIR"&lt;br /&gt;&lt;br /&gt;echo "Removing old local backups"&lt;br /&gt;cd $BACKUPDIR&lt;br /&gt;find . -type d -name "." -prune -o -type d -atime +3 -exec rm -rf {} \; -print&lt;br /&gt;&lt;br /&gt;echo "Starting INNODBACKUP:"&lt;br /&gt;&lt;br /&gt;echo "$INNOBACKUP $INNOBACKUP_OPTIONS $BACKUPDIR" &lt;br /&gt;$INNOBACKUP $INNOBACKUP_OPTIONS $BACKUPDIR &lt;br /&gt;&lt;br /&gt;echo "Sleeping for a 1 min 10 seconds"&lt;br /&gt;sleep 70&lt;br /&gt;&lt;br /&gt;S3_DIR_UPLOAD=`find . -maxdepth 1 -type d -cmin +1 -print |grep ./ |cut -d. -f2 |cut -d/ -f2|xargs`&lt;br /&gt;&lt;br /&gt;echo "Dir to Synchronize are $S3_DIR_UPLOAD"&lt;br /&gt;&lt;br /&gt;for dir in $S3_DIR_UPLOAD; do &lt;br /&gt;   echo "Executing $JETS3/$dir"&lt;br /&gt;   $JETS3/$dir&lt;br /&gt;done&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Configuring JetS3t is simple, its a matter of modifying a properties file.&lt;br /&gt;&lt;br /&gt;The jets3t directory (my case /usr/local/jets3t/) a directory config contains all the configs. In synchronize.properties define your accesskey and secretkey for your s3 account.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Line 8 says, synchronize the current folder and push it to s3 bucket dbprodbackups. JetS3t handles region automatically (Amazon tools do not).&lt;br /&gt;&lt;br /&gt;Running the script you'll see something like this for the innodb backup&lt;br /&gt;&lt;br /&gt;[02] Copying ./ShardLookup/MainLookup.ibd &lt;br /&gt;     to /sqldata/backups/2011-07-29_20-13-38/ShardLookup/MainLookup.ibd&lt;br /&gt;&gt;&gt; log scanned up to (99395152227)&lt;br /&gt;&gt;&gt; log scanned up to (99395249621)&lt;br /&gt;&gt;&gt; log scanned up to (99395366299)&lt;br /&gt;&gt;&gt; log scanned up to (99395373016)&lt;br /&gt;&gt;&gt; log scanned up to (99395382850)&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;and something like this for the jets3t upload&lt;br /&gt;&lt;br /&gt;...&lt;br /&gt;&lt;br /&gt;N 2011-07-29_20-13-38/xtrabackup_binary&lt;br /&gt;N 2011-07-29_20-13-38/xtrabackup_binlog_info&lt;br /&gt;N 2011-07-29_20-13-38/xtrabackup_checkpoints&lt;br /&gt;N 2011-07-29_20-13-38/xtrabackup_logfile&lt;br /&gt;  Large upload parts: 32/43 - 76% of 38.88 GB (15.89 MB/s - ETA: 8 1/2 minutes) &lt;br /&gt;&lt;br /&gt;In about 1 hour I am able to backup 40GB and upload it to s3.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3933847645017779621?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3933847645017779621/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3933847645017779621' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3933847645017779621'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3933847645017779621'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/07/automated-innodb-hot-backup-to-s3-from.html' title='Automated Innodb Hot Backup to S3 from ec2 with a simple bash script, innobackup and jets3t'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8289997940831282958</id><published>2011-07-19T21:49:00.000-07:00</published><updated>2011-07-19T21:49:38.590-07:00</updated><title type='text'>What a web start-up engineering team should set-up pre-launch.</title><content type='html'>When asked this question a flurry of thoughts came to me and I stuttered to get them out. Here is my list and the reasons why. Note the basics of my reasoning comes from the desire not to wake up at night to fix a bug.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Agree to a base language:&lt;br /&gt;&lt;/b&gt;&lt;br /&gt;&lt;blockquote&gt;Syntactic sugar should be agreed upon. Building things in different languages at the start without the teams acknowledgement is just building a walled garden.&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;b&gt;Agree to use the same code format:&lt;/b&gt;&lt;br /&gt;&lt;blockquote&gt;The team needs to agree to make the code look the same. Make a decision on tabs over spaces, DOS or UNIX format, class / directory layout, MVC or procedural etc.&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;b&gt;Agree to a source code repo:&lt;/b&gt;&lt;br /&gt;&lt;blockquote&gt;Revisions are good, use source control&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;b&gt;Agree to a bug tracking system:&lt;/b&gt;&lt;br /&gt;&lt;blockquote&gt;Don't just use email or spreadsheets use a free open source bug database and make an interface for users to add bugs. If a user is frustrated enough to write a bug, the least an engineer can do is take the time to read their problems.&lt;/blockquote&gt;&lt;br /&gt;&lt;b&gt;Agree on what to launch with as your base architecture.&lt;/b&gt; (Doesn't mean it can't change)&lt;br /&gt;&lt;blockquote&gt;How is the data stored? How is it accessed? What is the payload size? Where is the data coming from? What is the latency? How can I monitor it? When does it break?&lt;br /&gt;These are some sample questions that each team member can answer (at least a majority of them).&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;b&gt;Build tools to push code via one or two clicks.&lt;br /&gt;&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;Build general tools about your users, your data, management for uploading to CDN, packing JS etc.&lt;/blockquote&gt;&lt;br /&gt;&lt;b&gt;Build a stat system to record exceptions, page load times, stats from apache, stats from the database or system in general and if you are uber make it the same stat system used for Business Intelligence.&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;The same system that tells me how many unique returning users also reports what the mysql system resources are.&lt;br /&gt;&lt;br /&gt;Stats are crucial for any organization, know when and why (realtime) when a user experiences a problem as well as when a new user joins.&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;b&gt;Have fun, learn and be productive for your users sake :) &lt;/b&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;What else would you add?&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8289997940831282958?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8289997940831282958/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8289997940831282958' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8289997940831282958'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8289997940831282958'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/07/what-web-start-up-engineering-team.html' title='What a web start-up engineering team should set-up pre-launch.'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5043305943337186674</id><published>2011-06-15T16:22:00.000-07:00</published><updated>2011-06-15T16:22:40.218-07:00</updated><title type='text'>AMQP PHP for RabbitMQ to talk to Node.js</title><content type='html'>There are really no examples on how to use &lt;a href="http://www.php.net/manual/en/book.amqp.php"&gt;PHP's AMQP PECL&lt;/a&gt; module, yet the documentation at php.net is still great and easy to understand. So, after reading the documentation in 5 mins I was able to put together a PHP Producer and reuse my Node.js Consumer.&lt;br /&gt;&lt;br /&gt;Here is an example starting with PHP&lt;br /&gt;&lt;pre class="brush: php;"&gt;&lt;br /&gt;//&lt;br /&gt;// connect&lt;br /&gt;//&lt;br /&gt;$cnn = new AMQPConnection();&lt;br /&gt;$cnn-&gt;connect();&lt;br /&gt;&lt;br /&gt;//&lt;br /&gt;// exchanges talk to queues set a fanout up&lt;br /&gt;// &lt;br /&gt;$ex = new AMQPExchange($cnn);&lt;br /&gt;$ex-&gt;declare('notifExchange2', AMQP_EX_TYPE_FANOUT, AMQP_AUTODELETE);&lt;br /&gt;&lt;br /&gt;//&lt;br /&gt;// add stuff to a queue for me to look at later (not needed)&lt;br /&gt;//&lt;br /&gt;$queue = new AMQPQueue( $cnn );&lt;br /&gt;$queue-&gt;declare('myqueue');&lt;br /&gt;$queue-&gt;bind('notifExchange2', 'routingkey');&lt;br /&gt;&lt;br /&gt;//&lt;br /&gt;// pure strings with no format are boring let's use json&lt;br /&gt;//&lt;br /&gt;$data =  array('message' =&gt; "helloworld");&lt;br /&gt;$str = json_encode($data);&lt;br /&gt;&lt;br /&gt;$ex-&gt;publish($str, 'routingKeyWhichIgnoredBecauseOfFanout', 0, array('Content-type' =&gt; 'text/json'));&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Now for Node.js&lt;br /&gt;&lt;br /&gt;&lt;pre class="brush: js;"&gt;#!/bin/env node&lt;br /&gt;&lt;br /&gt;require("./amqp.connection");&lt;br /&gt;require("./utilities");&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;connection.addListener('ready', function () {&lt;br /&gt;   &lt;br /&gt;    var e = connection.exchange('notifExchange2', {type: 'fanout'});&lt;br /&gt;    var q = connection.queue('100224', {autoDelete: false});&lt;br /&gt;&lt;br /&gt;    q.on('queueDeclareOk', function (args) {&lt;br /&gt;        puts('queue opened: Message Count:'+ args.messageCount + " ConsumerCount: " + args.consumerCount);&lt;br /&gt;        q.bind(e, "#");&lt;br /&gt;        q.subscribe(function(message) {&lt;br /&gt;            puts("Got a message: " + var_dump(message));&lt;br /&gt;        });&lt;br /&gt;    });&lt;br /&gt;});&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Pretty straight forward. When the connection is ready setup the exchange, declare the queue, once the queue is declared bind it to the exchange and on q.subscribe var_dump the message out. Notice var_dump in javscript that's defined in utilities.&lt;br /&gt;&lt;br /&gt;Easy-peasy.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5043305943337186674?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5043305943337186674/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5043305943337186674' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5043305943337186674'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5043305943337186674'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/06/amqp-php-for-rabbitmq-to-talk-to-nodejs.html' title='AMQP PHP for RabbitMQ to talk to Node.js'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3648431298477547182</id><published>2011-06-14T20:09:00.000-07:00</published><updated>2011-06-14T20:09:31.334-07:00</updated><title type='text'>Building a realtime Feed with RabbitMQ Node.js AMQP and mySQL</title><content type='html'>Being a backend programmer, I rarely code in JavaScript. Today I code a lot of cross browser JavaScript with the fantastic lib jquery and its various extensions. Front end programming is hard to get correct since IE sucks but that's another Blog post that is out of scope. I certainly have more of an appreciation for the people that focus in this area. I can finally say I am really well rounded in doing CSS, JavaScript, Server tuning, DBA, PHP, C etc since I work the entire stack. Now for the setup and disclaimer. This post is not meant to provide a How-To but to explain the approach. If you would like a How To let me know on what.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;With Node.js the gap between the discipline of Front-End programming and Backend Programming is narrowing. To code in Node you need to know JavaScript. Node is great at handing off data and pumping data to the requester in an event based model that is fantastically fast and small. I love node, especially since node has access to the browser cookie.&lt;br /&gt;&lt;br /&gt;My use of node is very simple.&lt;br /&gt;&lt;br /&gt;Browser makes a long polling jsonp connection (json with padding basically the server can call code loaded on your browser) to Node.js.&lt;br /&gt;&lt;br /&gt;Node.js will send back any data that is waiting for the client, or is sent to the client during the connection lifetime, which recycles every 50 seconds. Node source of data is from RabbitMQ.&lt;br /&gt;&lt;br /&gt;RabbitMQ an erlang server which talks the AMQP protocol (shared by many message queue servers) is a message queue that can be made into a distributed fault tolerant system and hands off the message payloads to Node.js which sends the message to the users who are online and subscribe to an exchange. The source of data for RabbitMQ comes from the database commit in Apache application space (PHP).&lt;br /&gt;&lt;br /&gt;MySQL is the persistent store that handles page reload or init requests, while the previous two components enable the real-time feed.&lt;br /&gt;&lt;br /&gt;Memcache holds a persons friend list.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Notice Flash is not used since I wanted a pure JavaScript version and reduce the amount of download needed to service the request. Additionally web sockets are not finalized yet and does not work across all browsers while long polling do, Socket.io is cool but requires flash as well.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;So the ascii diagram shows this flow:&lt;br /&gt;&lt;br /&gt;Browser --&gt; Apache+PHP --&gt; DB&lt;br /&gt;if DB commit ok&lt;br /&gt;Apache+PHP--&gt;RabbitMQ--&gt;Node.js--&gt;Friends of Browser&lt;br /&gt;&lt;br /&gt;Since Node is a gateway lets focus more on RabbitMQ which handles the logic. I'm using RabbitMQ in a pub-sub model. To understand Rabbit knowing its vocabulary is important.&lt;br /&gt;&lt;br /&gt;Producer: Produces the message&lt;br /&gt;Exchange: The bridge between Producer and the Queue(s). Note the producer has no idea what the queues are in concept, all it needs to know is the exchange will take the message from the producer and send it to the correct queue(s).&lt;br /&gt;Queue: On disk queue or in memory (distributed) queue based on durability settings holds the message from Producers&lt;br /&gt;Consumer: Node.js it reads all events that Node subscribed to and hands it off to the connected user.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I wrote a benchmark in node to test the capability of this setup and its quite impressive: let's take a quick look:&lt;br /&gt;&lt;br /&gt;&lt;pre class="brush: js; toolbar: false; ruler: true;"&gt;require("./amqp.connection");&lt;br /&gt;&lt;br /&gt; connection.addListener('ready', function() {&lt;br /&gt;   puts("connected to " + connection.serverProperties.product);&lt;br /&gt;   var e = connection.exchange('[userid]_feedExchange', {type: 'fanout'});&lt;br /&gt;   e.on('exchangeDeclareOk', function(data){&lt;br /&gt;       e.publish('routingKeyWhichIgnoredBecauseOfFanout', {message: "This is the message payload"});&lt;br /&gt;&lt;br /&gt;       setTimeout(function () {&lt;br /&gt;          // wait one second to receive the message, then quit&lt;br /&gt;          connection.end();&lt;br /&gt;       }, 1000);&lt;br /&gt;});&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The require (line 1) sets up the connection: global.connection = amqp.createConnection(options);&lt;br /&gt;Line 3 sets up a listener which fires off code when the connection is established.&lt;br /&gt;Line 5 declares the exchange. If it doesn't exist create it, else keep going. Notice that the exchange is dynamically created from the userId, so queue binds can happen against this exchange&lt;br /&gt;Line 6 listens for another event exchangeDeclareOK, if OK send the message which is on line 7&lt;br /&gt;Line 9 sets a timeout to cleanly close the connection else you will get &lt;br /&gt;&lt;br /&gt;&lt;pre class="brush: js; toolbar: false; ruler: true;"&gt;&lt;br /&gt;net.js:392&lt;br /&gt;    throw new Error('Socket is not writable');&lt;br /&gt;          ^&lt;br /&gt;Error: Socket is not writable&lt;br /&gt;    at Connection._writeOut (net.js:392:11)&lt;br /&gt;    at Connection.write (net.js:378:17)&lt;br /&gt;    at Connection._sendMethod (/usr/local/node/lib/node_modules/amqp/amqp.js:1011:8)&lt;br /&gt;    at Object.cb (/usr/local/node/lib/node_modules/amqp/amqp.js:1799:21)&lt;br /&gt;    at Exchange._tasksFlush (/usr/local/node/lib/node_modules/amqp/amqp.js:1306:10)&lt;br /&gt;    at Exchange._onMethod (/usr/local/node/lib/node_modules/amqp/amqp.js:1772:8)&lt;br /&gt;    at Exchange._onChannelMethod (/usr/local/node/lib/node_modules/amqp/amqp.js:1338:14)&lt;br /&gt;    at Connection._onMethod (/usr/local/node/lib/node_modules/amqp/amqp.js:900:28)&lt;br /&gt;    at AMQPParser.onMethod (/usr/local/node/lib/node_modules/amqp/amqp.js:807:12)&lt;br /&gt;    at AMQPParser._parseMethodFrame (/usr/local/node/lib/node_modules/amqp/amqp.js:454:10)&lt;br /&gt;&lt;br /&gt;shell returned 1&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Since this is a fanout approach, RabbitMQ is sending the message to each "User" that subscribed to the exchange.&lt;br /&gt;&lt;br /&gt;Here is an example of a consumer in node:&lt;br /&gt;&lt;pre class="brush: js; toolbar: false; ruler: true;"&gt;&lt;br /&gt;#!/bin/env node&lt;br /&gt;&lt;br /&gt;require("./amqp.connection");&lt;br /&gt;require("./utilities");&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;connection.addListener('ready', function () {&lt;br /&gt;        puts("connected to " + connection.serverProperties.product);&lt;br /&gt;        var e = {};&lt;br /&gt;        var q = {};&lt;br /&gt;        var qNames = {};&lt;br /&gt;        e = connection.exchange('[userid]_feedExchange', {type: 'fanout'});&lt;br /&gt;        for(var i = 0; i &lt; 1000; i++){&lt;br /&gt;            var queueName = "_" + 100223 + i;&lt;br /&gt;            q[queueName] = connection.queue(queueName, {autoDelete: true});&lt;br /&gt;            var f = function(){&lt;br /&gt;                var k=i; //variables are scoped by function and not brace so to get a copy of i we need to send the k at the time k is declared for the local lambda&lt;br /&gt;                q[queueName].on('queueDeclareOk', function (args) {&lt;br /&gt;                    q[args.queue].bind(e, "*");&lt;br /&gt;                    puts(k+': queue opened: ' + args.queue + ' Message Count:'+ args.messageCount + " ConsumerCount: " + args.consumerCount);&lt;br /&gt;                    q[args.queue].subscribe(function(json) {&lt;br /&gt;                        console.log(k + ": " + json.message);&lt;br /&gt;                    });&lt;br /&gt;                });&lt;br /&gt;            }();&lt;br /&gt;        }&lt;br /&gt;});&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;This consumer binds 1000 queues starting from 100223 to the [userid]_feedExchange in the producer section&lt;br /&gt;&lt;br /&gt;On line 15 I create a hashmap of queues to access later when line 18 event queueDeclareOK is thrown.&lt;br /&gt;Line 19 binds each queue to the exchange&lt;br /&gt;Line 21 subscribes each queue to any message sent from the exchange (if we used routing we can listen to certain types of messages - another post).&lt;br /&gt;&lt;br /&gt;Very simple, it is fast and works. On a single processor to send a message to 1000 consumers takes&lt;br /&gt;&lt;br /&gt;real 0m0.301s&lt;br /&gt;user 0m0.220s&lt;br /&gt;sys 0m0.028s&lt;br /&gt;&lt;br /&gt;using very little memory&lt;br /&gt;&lt;br /&gt;==16049== &lt;br /&gt;==16049== HEAP SUMMARY:&lt;br /&gt;==16049==     in use at exit: 1,275,122 bytes in 140 blocks&lt;br /&gt;==16049==   total heap usage: 3,798 allocs, 3,658 frees, 11,036,111 bytes allocated&lt;br /&gt;==16049== &lt;br /&gt;==16049== Searching for pointers to 140 not-freed blocks&lt;br /&gt;==16049== Checked 505,492 bytes&lt;br /&gt;==16049== &lt;br /&gt;==16049== LEAK SUMMARY:&lt;br /&gt;==16049==    definitely lost: 1,600 bytes in 74 blocks&lt;br /&gt;==16049==    indirectly lost: 1,048,736 bytes in 31 blocks&lt;br /&gt;==16049==      possibly lost: 172,080 bytes in 3 blocks&lt;br /&gt;==16049==    still reachable: 52,706 bytes in 32 blocks&lt;br /&gt;==16049==         suppressed: 0 bytes in 0 blocks&lt;br /&gt;==16049== Rerun with --leak-check=full to see details of leaked memory&lt;br /&gt;==16049== &lt;br /&gt;==16049== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 33 from 8)&lt;br /&gt;--16049-- &lt;br /&gt;--16049-- used_suppression:     33 dl-hack3-cond-1&lt;br /&gt;==16049== &lt;br /&gt;==16049== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 33 from 8)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3648431298477547182?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3648431298477547182/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3648431298477547182' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3648431298477547182'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3648431298477547182'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/06/building-realtime-feed-with-rabbitmq.html' title='Building a realtime Feed with RabbitMQ Node.js AMQP and mySQL'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8084347949595595083</id><published>2011-05-16T19:01:00.001-07:00</published><updated>2011-05-16T19:03:59.019-07:00</updated><title type='text'>Installing GearmanD on Amazon's EC2 Linux AMI Small Instance</title><content type='html'>Linux AMI is very close to Centos/RedHat but its Amazon's own distro. Here are some quick steps on installing Gearmand on your system. I am currently using it to distribute jobs across many instances, to run asynchronously or synchronously where the Apache CPU isn't blocked on long running procs, like fetching data from a website realtime, massaging the data and returning it to the browser or resizing images.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# put the stuff in /var/tmp&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;cd /var/tmp;&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# get the source&lt;br /&gt;#&lt;br /&gt;wget http://launchpad.net/gearmand/trunk/0.20/+download/gearmand-0.20.tar.gz&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# setup libs required for the source to complie&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;yum install -y libevent-devel.i386&lt;br /&gt;yum install -y gcc-c++.i386&lt;br /&gt;yum instal -y boost-devel.i386 // C++ libs&lt;br /&gt;yum install -y libuuid-devel.i686 -- do not install the i386 version, it puts uuid.h inside /usr/include not /usr/include/uuid/&lt;br /&gt;yum install -y memcached-devel.i686&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# extract the source&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;tar xvzf gearmand-0.20.tar.gz&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# configure / make / make install&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;cd gearmand-0.20&lt;br /&gt;./configure --prefix=/usr &lt;br /&gt;make &amp;&amp; make test&lt;br /&gt;make install&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# add the user and run it&lt;br /&gt;#&lt;br /&gt;adduser gearmand&lt;br /&gt;/usr/sbin/gearmand -u gearmand&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# need a client&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;Now install PECL GearMan&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# client is not stable, thus use beta&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;pecl install channel://pecl.php.net/gearman-0.7.0&lt;br /&gt;&lt;br /&gt;you should now see&lt;br /&gt;&lt;br /&gt;Build process completed successfully&lt;br /&gt;Installing '/usr/lib/php/modules/gearman.so'&lt;br /&gt;install ok: channel://pecl.php.net/gearman-0.7.0&lt;br /&gt;configuration option "php_ini" is not set to php.ini location&lt;br /&gt;You should add "extension=gearman.so" to php.ini&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;After following the directions of extension=gearman.so&lt;br /&gt;&lt;br /&gt;php --info |grep gear&lt;br /&gt;gearman&lt;br /&gt;gearman support =&gt; enabled&lt;br /&gt;libgearman version =&gt; 0.20&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;now restart apache&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Tada now for some client code:&lt;br /&gt;&lt;br /&gt;One thing that I hate having to do is restart services when pushing code. I just want it to work. So using a Wrapper/Bridge design pattern in conjunction with restartd (using supervised pyton kit is just not possible) My new code is ready as soon as it makes it to disk.&lt;br /&gt;&lt;br /&gt;Here is how I did it:&lt;br /&gt;&lt;br /&gt;Three classes&lt;br /&gt;&lt;br /&gt;GearmanJobSubmitter.php&lt;br /&gt;&lt;br /&gt;GearmanJobPerformer.php&lt;br /&gt;&lt;br /&gt;GearmanJobGeneric.php&lt;br /&gt;&lt;br /&gt;The performer delegates the job to GearmanJobGeneric and  has a method called JobsWrapper(GearMan $job)&lt;br /&gt;&lt;br /&gt;JobsWrapper by looking at the workload is able to determine which Job to call.  If the file that contains the meat of the Job's mtime has changed jobs wrapper will throw an exception and ext, otherwise execute the job.&lt;br /&gt;&lt;br /&gt;If the wrapper killed itself, restartd then sees that the worker is not running and starts it back up.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Currently I have GearmanD managing file uploads, stat logging, data collecting etc.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8084347949595595083?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8084347949595595083/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8084347949595595083' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8084347949595595083'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8084347949595595083'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/05/installing-gearmand-on-amazons-ec2.html' title='Installing GearmanD on Amazon&apos;s EC2 Linux AMI Small Instance'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-2425391232223898754</id><published>2011-04-20T16:41:00.000-07:00</published><updated>2011-05-26T13:07:41.598-07:00</updated><title type='text'>Building an email system on EC2 from top to bottom with SendGrid</title><content type='html'>Email is a beast. Sending email is easy but getting it pass spam filters when you’re a legitimate service is rather hard. Sending good email is especially hard when on EC2. The reason is due to spammers using and abusing elastic IPS. So, for startups your best bet of sending out a lot of email and getting it to the user is to use a service. I picked Sendgrid. It's cheap, fast, has good email tracking and builds all the appropriate email headers to get the mail white listed and into the destinations inbox. Sendgrid is the sender (think of them as an extension to sendmail). This is the easy part but to make a true email system that protects your users you need to take some things into consideration.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;All HTML mail needs a Text counter part. Some people just like mutt or pine over HTML email. Thus when sending email send out mail in HTML format and Text format with mime headers so what ever email client is used can see a good formatted email.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;All HTML links should be encrypted and encoded when passing identifying information. This needs to be done to make sure that the person that the link is intended for is clicked by that person. For instance&lt;br /&gt;&lt;br /&gt;http://www.example.com/?enc=Ujcrq3uW8oU%2BpkW8bPJirwfczkMBnaWMObHlzCK8taau9PAjEQhCIZToj302zjVRs2f61bt7dddT7v21kpbhw6ZR5B1%2BoBIZNAznoLceK7z%2B%2BBm%2FS7%2FHKx0zfYah2Du%2FdaxP9dGel67SyQBp9ZJurXomrkyqkeLJiPioKMCaoygHruI%2FcJ83DvmOBNhqOjNQLyVMIHdjEWx3yYTMTsSZRUDdNPdaBfuTD3InspKINsQBBON0fPe890l3%2Bpb6p%2F4GtA%3D%3D&amp;utm_source=sendgrid.com&amp;utm_medium=email&amp;utm_campaign=website&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now I can track retention and since the enc value is encrypted using AES-256 people are not going to break this encoding with out the Private key. Personally I am using this data for two purposes. The primary purpose is to ensure that the click comes from the intended person; the next purpose is to pass data around for what the app needs to fetch.&lt;br /&gt;&lt;br /&gt;An example. XYZ commented on your status update. Click here to see the comment. When the person clicks I need to pull that specific activity to generate the message. Thus the link allows for that with no storage overhead. Here is some example code&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;public function encrypt( $data, $forUserId='' ){&lt;br /&gt;        # &lt;br /&gt;        # open cipher module (do not change cipher/mode)&lt;br /&gt;        #&lt;br /&gt;        $this-&gt;openCipherModule();&lt;br /&gt;        $this-&gt;createIV();&lt;br /&gt;        $this-&gt;setUserKey($forUserId);&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;        $msg = json_encode($data);&lt;br /&gt;&lt;br /&gt;        $this-&gt;init();&lt;br /&gt;&lt;br /&gt;        $encoded = $this-&gt;doEncryption($data);&lt;br /&gt;&lt;br /&gt;        $this-&gt;closeEncryption();&lt;br /&gt;&lt;br /&gt;        return $encoded;&lt;br /&gt;}&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Now that I have sending down, links down, we need to put it all together. I am using sendmail as my mail transfer agent (MTA) and here is what is needed on EC2 to get it to work.&lt;br /&gt;&lt;br /&gt;&lt;ol&gt;&lt;br /&gt;&lt;lI&gt; yum install sendmail &lt;/li&gt;&lt;br /&gt;&lt;li&gt; yum install sendmail-cf &lt;/li&gt;&lt;br /&gt;&lt;li&gt; vim /etc/mail/sendmail.cf and add define(`SMART_HOST', `smtp.sendgrid.net')dnl  *says send all localmail to sendgrid*&lt;/li&gt;&lt;br /&gt;&lt;li&gt; vim /etc/mail/access and add AuthInfo:smtp.sendgrid.net  "U:sendgrid username"   "P:sendgrid pass for your account"  "M:PLAIN" *when sending mail through sendgrid use your sendgrid account info*&lt;/li&gt;&lt;br /&gt;&lt;li&gt;m4 /path/to/m4.cf /etc/mail/sendmail.mc &gt; /etc/mail/sendmail.cf *"compile the changes"&lt;/li&gt;&lt;br /&gt;&lt;li&gt;makemap hash /etc/mail/access.db &lt; /etc/mail/access *encode the pass*&lt;/li&gt;&lt;br /&gt;&lt;li&gt;/etc/init.d/sendmail restart&lt;/li&gt;&lt;br /&gt;&lt;/ol&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I choose to send mail locally to queue incase sendgrid goes down, which happens often this is why I don't make a socket connection to their servers realtime. &lt;br /&gt;&lt;br /&gt;Next we need to configure PHP's SWIFT class to sendmail locally&lt;br /&gt;&lt;code&gt;&lt;br /&gt;        $transport = Swift_SmtpTransport::newInstance('localhost', 25);&lt;br /&gt;        $this-&gt;swift = Swift_Mailer::newInstance($transport);&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now the only thing left to do is building a table to record all the clicks that people do to unsubscribe from getting email&lt;br /&gt;&lt;br /&gt;&lt;code&gt;&lt;br /&gt;&lt;br /&gt; CREATE TABLE `DoNotEmail` (&lt;br /&gt;  `userId` bigint(20) unsigned NOT NULL DEFAULT '0' COMMENT 'userId that is getting the email',&lt;br /&gt;  `emailAddr` varchar(255) NOT NULL COMMENT 'Denormalized email address',&lt;br /&gt;  `emailAddrHash` bigint(20) unsigned NOT NULL DEFAULT '0' COMMENT 'emailAddr in our numeric format',&lt;br /&gt;  `createdDate` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'when the email entered the system',&lt;br /&gt;  PRIMARY KEY (`emailAddrHash`,`userId`)&lt;br /&gt;) ENGINE=InnoDB DEFAULT CHARSET=utf8&lt;br /&gt;&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;Any time a person clicks unsubscribe a row is inserted into this table. Anytime email is ready to be built and sent a query is performed on this table by emailAddrHash which is 8 bytes instead of 50+ bytes for email. I like to keep my keys small.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Most of the time will be spent building your email templates and this is just an abbreviated list of steps things to consider to move the process faster.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-2425391232223898754?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/2425391232223898754/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=2425391232223898754' title='9 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2425391232223898754'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2425391232223898754'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/04/building-email-system-on-ec2-from-top.html' title='Building an email system on EC2 from top to bottom with SendGrid'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>9</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-590443040599716141</id><published>2011-04-18T17:12:00.000-07:00</published><updated>2011-04-18T17:49:10.299-07:00</updated><title type='text'>Operations is the corner stone of a Web company</title><content type='html'>Operations defined as the guys/gals who deal with servers have many different names. They are called Dev OPS, Sysadmins, System Engineers, Capacity Planners, Server Monkey (if you are a jerk), DBA, the guy in the corner with an Anarchy Symbol on his Shirt etc.&lt;br /&gt;&lt;br /&gt;OPS is the front line defense and Cavalry. They know more about the system then the developer who installed software on OPS managed servers. Because they see the crash happen in the wild. One word of advice, your organization is going to die if you piss off the guy with ROOT access. If you don't know what that is - you really really need to be nice to OPS.&lt;br /&gt;&lt;br /&gt;I have been at many layers of a company. I've been QA, Lab Tech, System Tuner, Server Installer, Web Developer, Software Engineer, DBA, Director of Engineering, Chief Architect, Business interface, yet I am just the dude that can get stuff done with the help of the people around me. &lt;br /&gt;&lt;br /&gt;In many places I have seen ops treated, as Second Class Citizens-this is where I step in. I pride myself as being a member of OPS. I pride myself, as being a member of DEV but what I really pride myself in is being the Dude that is the bridge between OPS and DEV. The guy that says hey dude this is your 5th Custom Server type your messing with OPS puppet profile, can we standardize? The guy that will consolidate underutilized servers when OPS let’s me know or when I catch it myself. I am the guy that does what OPS want when OPS wants it done. The guy that looks at the same charts that OPS looks at to find what's broken. The guy that suggests in Engineering meetings which excludes OPS to include OPS. I am also the guy that suggests DEVs should be on call as well as OPS.&lt;br /&gt;&lt;br /&gt;Value OPS! They are on your team, and if they do not look busy that means they are really good, DEVs and OPS is jelling and your team is going to WIN! If your OPS team is not happy make them because do you know how to log into the power strip and bounce the rack?&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-590443040599716141?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/590443040599716141/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=590443040599716141' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/590443040599716141'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/590443040599716141'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/04/operations-is-corner-stone-of-web.html' title='Operations is the corner stone of a Web company'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5092269147599798232</id><published>2011-04-01T09:44:00.000-07:00</published><updated>2011-04-01T12:45:16.338-07:00</updated><title type='text'>Building a Facebook Feed Like system on a Sharded mySQL System</title><content type='html'>Building a Feed can be broken down into a few key questions.&lt;br /&gt;&lt;br /&gt;Who can see what?&lt;br /&gt;How many people can see it?&lt;br /&gt;&lt;br /&gt;These key answers really dictates the design of data structure from a global read method to a global write method. A global read method can be summarized using some sql.&lt;br /&gt;&lt;br /&gt;SELECT * FROM Activity WHERE userId IN (?,?,?,?,?) AND createDate &gt; NOW() - INTERVAL 2 WEEKS;&lt;br /&gt;&lt;br /&gt;Above is actually not an optimal SELECT, what is really done in my case is foreach '?' do a parallel query to each shard group of&lt;br /&gt;&lt;br /&gt;SELECT * FROM Activity WHERE userId = ? AND createDate &gt; NOW() - INTERVAL 2 WEEKS;&lt;br /&gt;&lt;br /&gt;The reason why the 1st query is not optimal is due to the fact that their are two ranges in the 1st query. The IN clause is a range and createDate is a range thus you can't use a composite key (userId, createDate) the query is only using userId.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;A global write method can be summarized with the following query:&lt;br /&gt;&lt;br /&gt;SELECT * FROM Activity WHERE rowOwnerId = ? AND createDate &gt; NOW() - INTERVAL 2 WEEKS.&lt;br /&gt;&lt;br /&gt;Now you may be wondering if you are doing a global write, why is there a select? For each friend that is following the person who has activity done to said person which either they initiated or is acted upon, write a row for said person and their friends. Thus one action can create 5000+ writes such that the number of writes are proportional to the number of friends that is following the person being acted upon. Writes are N+1.&lt;br /&gt;&lt;br /&gt;Global writes allows for you're view of the Feed to be fast but is really hard to keep it in real-time in sync, additionally its very expensive since a copy of the pointer can be copied N times and to get real speed and to avoid additional reads, the content is copied for each friend write. In global writes you have to create queues which succumbs to &lt;a href="http://en.wikipedia.org/wiki/Queueing_theory"&gt;queuing theory&lt;/a&gt;. Additional to these considerations edits and deletes are very hard to keep in sync as well as need special consideration for new friends joining. These things can expose system lag constantly. That being said it can still be done, it's a lot of work that is expensive in terms of hardware cost and developer time.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The system I have built supports both yet currently implements a global read method. The mysql table structure follows.&lt;br /&gt;&lt;br /&gt;userId bigint unsigned NOT NULL DEFAULT 0 - this is the person being acted upon.&lt;br /&gt;parentOwnerId bigint unsigned NOT NULL DEFAULT 0 - this is the person who created the content&lt;br /&gt;parentId bigint unsigned NOT NULL DEFAULT 0 - this defines the pointer for the actual content&lt;br /&gt;parentType smallint unsigned NOT NULL DEFAULT 0 - this defines that the content is a wall post or a friend event or &lt;br /&gt;&lt;br /&gt;itemType smallint unsigned NOT NULL DEFAULT 0 - this defines the actual action, which could be a comment to the parent Type or just the parent itself&lt;br /&gt;itemId bigint unsigned NOT NULL DEFAULT 0 - this defines the pointer to the item for content retrieval&lt;br /&gt;createDate timestamp not NULL DEFAULT 0 - this indicates when the event entered the system&lt;br /&gt;modifiedDate timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP - this defines when the item was added to for a later feature.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The primary key in this mix is &lt;br /&gt;userId, itemType, parentId&lt;br /&gt;&lt;br /&gt;this defines that the user acted upon will only have 1 row foreach type of a parentId. This means if there are 1000 comments to a status update, the activity table will have 1 row for that user that made the comment even if that same user was the person who made 1000 comments. Note that this activity table was updated 1000 times because modifiedDate is a timestamp that gets updated on every action.&lt;br /&gt;&lt;br /&gt;As you can tell the Activity Table is centered to the entire system and the actual content tables are what I call branches to Activity. Let's take an example to explain this.&lt;br /&gt;&lt;br /&gt;user 1000 adds a status update, status update is a table called WallPosts.&lt;br /&gt;&lt;br /&gt;WallPosts.posterId - 88888&lt;br /&gt;WallPosts.itemOwnerId - 1000&lt;br /&gt;WallPosts.createDate - NOW()&lt;br /&gt;WallPosts.posterId - 1000&lt;br /&gt;WallPosts.post - This is my status update.&lt;br /&gt;&lt;br /&gt;Activity.userId - 1000&lt;br /&gt;Activity.parentOwnerId - 1000&lt;br /&gt;Activity.parentId - 88888&lt;br /&gt;Activity.parentType - 1&lt;br /&gt;Activity.itemType - 1&lt;br /&gt;Activity.itemId - 88888&lt;br /&gt;Activity.createDate - NOW()&lt;br /&gt;Activity.modifiedDate - NOW()&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;So now friends of 1000 will query userId 1000's shard for the WallPost. Now let's have userId 2000 leave a comment in a table called Comments&lt;br /&gt;&lt;br /&gt;Comments.commentId - 88889&lt;br /&gt;Comments.itemOwnerId -  1000&lt;br /&gt;Comments.itemType - 1&lt;br /&gt;Comments.commenterId - 2000&lt;br /&gt;Comments.createDate - NOW()&lt;br /&gt;Comments.comment - " I left a comment  on your status update!"&lt;br /&gt;&lt;br /&gt;This says that friend 2000 left a comment on userId's 1000 (their shard) of "I left a comment on your status update!"&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Activity.userId - 2000&lt;br /&gt;Activity.parentOwnerId - 1000&lt;br /&gt;Activity.parentType - 1&lt;br /&gt;Activity.parentId - 88888&lt;br /&gt;Activity.itemType - 2 //(left a comment)&lt;br /&gt;Activity.itemId - 88889&lt;br /&gt;Activity.createDate - NOW()&lt;br /&gt;Activity.modifiedDate - NOW()&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;This says that friends of userId -2000, 2000 left a comment on UserId 1000's status update where the comment and the status update both reside on 1000's shard. The content can be reached via&lt;br /&gt;&lt;br /&gt;88889 == CommentId&lt;br /&gt;&lt;br /&gt;and friends of 2000 now know that 2000 left a comment for 1000. Thus the viral effect within 1/2 degree's of userId 1000.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;There is a lot more going on behind the scenes like Children of parents, Using the combined read throughput of memcache, sharded mysql system and permission filtering but this is the general idea.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I am going to continually update this post, so keep checking back here as I add diagrams, flows, links and details looks at grouping events.&lt;br /&gt;&lt;br /&gt;Note: By no means am I saying that a global write or Fan-Out write is not as good as a Fan-Out on Load or global read. I'm taking an approach to make it cheap and not trying to optimize to soon.&lt;br /&gt;&lt;br /&gt;Here are some good links that I found after writing this post:&lt;br /&gt;&lt;a href="http://www.quora.com/What-are-the-scaling-issues-to-keep-in-mind-while-developing-a-social-network-feed"&gt;&lt;br /&gt;http://www.quora.com/What-are-the-scaling-issues-to-keep-in-mind-while-developing-a-social-network-feed&lt;/a&gt;&lt;br /&gt;&lt;a href=" http://www.quora.com/What-are-best-practices-for-building-something-like-a-News-Feed?q=news+feed"&gt;&lt;br /&gt;http://www.quora.com/What-are-best-practices-for-building-something-like-a-News-Feed?q=news+feed&lt;br /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5092269147599798232?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5092269147599798232/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5092269147599798232' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5092269147599798232'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5092269147599798232'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/04/building-facebook-feed-like-system-on.html' title='Building a Facebook Feed Like system on a Sharded mySQL System'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6843079412992606165</id><published>2011-03-24T11:11:00.000-07:00</published><updated>2011-04-01T09:44:22.747-07:00</updated><title type='text'>Left RockYou to do new things</title><content type='html'>I decided to leave Rockyou because after three years of working on the Facebook platform, Reach Applications, Viral Loops, Games, Scaling, and Management. Being a Director of Engineering was cool yet management decayed my skills as a system builder-ideally in the future I can find a good balance between coding which I love and management of a process. Since I focused on management, it's like being a C++ programmer for years; this programmer does not code for a while and then is asked to build an optimal hash table within a few days. This programmer knows if he/she was in the thick of things as a Coder he/she could do it. Since he/she is rusty it will take he/she much longer. This suddenly became apparent as I was asked softball questions and I did not answer them quickly enough. Fortunately I am taxing my skills and as a result they are flooding back. My mojo is here. I am learning new things and one thing I can say is JQUERY is awesome.&lt;br /&gt;&lt;br /&gt;To my past co-workers, friends, Rockyou family I deeply honored of our time together. We built some amazing things. Even at one point in time we surpassed Facebook in Daily and Monthly Uniques! Rockyou has a world class OPS organization that was pivotal in keeping 99.999% uptime. My team, which was the bridge between operations and development one word I can use to describe them: AWESOME.&lt;br /&gt;&lt;br /&gt;Now for the next things I will focus on. I was going to be a speaker at Collaborate 2011 in Florida but I cannot speak now since I do not work for Rockyou as they where going to sponsor my trip to Collaborate; thus I will post what I was going to talk about:&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;br /&gt;&lt;li&gt; How &lt;span style="font-weight:bold;"&gt;one&lt;/span&gt; person can manage hundreds of mySQL servers doing &lt;span style="font-weight:bold;"&gt;400K TPS combined&lt;/span&gt;&lt;/li&gt;&lt;br /&gt;&lt;/ul&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Future posts:&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;br /&gt;&lt;li&gt; Building a Fast Feed &lt;/li&gt;&lt;br /&gt;&lt;li&gt; Long Polling and controlling the effects on your backend systems &lt;/li&gt;&lt;br /&gt;&lt;/ul&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6843079412992606165?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6843079412992606165/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6843079412992606165' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6843079412992606165'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6843079412992606165'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2011/03/left-rockyou-to-do-new-things.html' title='Left RockYou to do new things'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6864454767808246462</id><published>2010-12-16T09:50:00.000-08:00</published><updated>2010-12-16T11:28:07.077-08:00</updated><title type='text'>HandlerSocket mySQL's NoSQL, PHP and Webscale</title><content type='html'>&lt;a href="http://golanzakai.blogspot.com/2010/10/installing-denas-handlersocket-nosql.html"&gt;HandlerSocket&lt;/a&gt; is a server plug-in that interfaces with InnoDB directly bypassing for the most part the core mySQL server all together. Using Handler socket you do not connect to the traditional port 3306 or use the mySQL protocol to communicate with the mySQL server, you use 9998 reads, 9999 writes and talks a different protocol (much smaller) to an epoll-based service. This awesome addition means that you can have many many connections with very little overhead. The core mySQL server does a good job, but can do better. I'm sure the reason why epoll is not in the core engine is probably due to licensing issues or some other reason I am not aware of, like it would be a big deal to add it.&lt;br /&gt;&lt;br /&gt;Why bypass the server? Really bypassing the sql parser is what was desired. The sql parser ads a significant amount of slowdown at a huge transaction rate, additionally mutex contention is involved prior to reaching the storage engine with malloc overhead (although Monty has fixed this in MariaDB). Even though you can easily get 45-70K selects a second through the mySQL SQL parser layer for InnoDB primary key lookups, this number falls short of Memcache's 600K Gets per second or various other NoSQL solutions that keep data in memory. &lt;br /&gt;&lt;br /&gt;HandlerSocket just like, Memcache, Cassandra, MongoDB accesses the data by a key, with the fastest access on Primary Key since that's how InnoDB structures it data, sorted by the primary key. I've seen benchmarks that show Handler socket doing 750K Transactions per second on a single server. I'm now in the process of benching it and let me say I think I can get a better number. This by the way blows away ALL OTHER NoSQL benchmarks with the added benefit of an ACID compliant DATABASE.  These numbers and durability really shows the power of InnoDB this is why I believe it's the best Storage engine in the world. (Plus the code is clean).&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Percona released a  &lt;a href="http://www.mysqlperformanceblog.com/2010/12/13/percona-server-5-1-52-12-3/"&gt;XTRADB server version&lt;/a&gt; with HandlerSocket a plug-in created by the &lt;a href="http://yoshinorimatsunobu.blogspot.com/2010/10/using-mysql-as-nosql-story-for.html"&gt;engineers of Dena&lt;/a&gt; and immediately I started testing it. &lt;br /&gt;&lt;br /&gt;Installation of HandlerSocket is detailed &lt;a href="http://golanzakai.blogspot.com/2010/10/installing-denas-handlersocket-nosql.html"&gt;here&lt;/a&gt; in case you want to have HandlerSocket on a vanilla mySQL source base. The latest version prior to the version above of XTRADB, I ran into a big stall and had to roll back to a vanilla install of mySQL to get rid of it. When I have some time I'll try to duplicate the stall in a controlled environment and use PMP to track down where the stall is. The versions that I dealt with are drastically different so it may just be an new InnoDB stall or something else.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The PHP Client that I am using is &lt;a href="http://code.google.com/p/php-handlersocket/"&gt;php-handlersocket &lt;/a&gt;which is a PECL type version (C driver with exposure to PHP). It does the job but needs some work that I'm doing now.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Here is some rough code and output of data.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;#&lt;br /&gt;# make a php connection&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;$hs = new HandlerSocket($host, $port);&lt;br /&gt;if (!($hs-&gt;openIndex(1, $dbname, $table, HandlerSocket::PRIMARY, 'facebook_id,shard_id,shard_lock'))){&lt;br /&gt;    die($hs-&gt;getError() . "\n");&lt;br /&gt;    &lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# execute a query on the primary key and return the columns from the 5th parameter on openIndex&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;$retval1 = $hs-&gt;executeSingle(1, '=', array($uid),1,0);&lt;br /&gt;init_funcs_log_info("HandlerSocket", "ExecuteSingle:" . var_export($retval1,1), 'socket_handler_query');&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# prepare a friend query&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;$socket_handler_commands = array();&lt;br /&gt;&lt;br /&gt;foreach($list_of_users as $userid){&lt;br /&gt;    $socket_handler_commands[] = array(1, '=', array($userid));&lt;br /&gt;&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;#&lt;br /&gt;# execute the friend query&lt;br /&gt;#&lt;br /&gt;&lt;br /&gt;$retval2 = $hs-&gt;executeMulti($socket_handler_commands);&lt;br /&gt;&lt;br /&gt;init_funcs_log_info("HandlerSocket", "ExecuteMulti:count(" .count($list_of_users) . ")\n" . var_export($retval2,1), 'socket_handler_query');&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6864454767808246462?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6864454767808246462/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6864454767808246462' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6864454767808246462'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6864454767808246462'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/12/handlersocket-mysqls-nosql-php-and.html' title='HandlerSocket mySQL&apos;s NoSQL, PHP and Webscale'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3298070247948878449</id><published>2010-11-29T10:46:00.000-08:00</published><updated>2010-11-29T11:13:48.535-08:00</updated><title type='text'>Back from my European Vacation</title><content type='html'>I am finally a citizen of the world. I beat out the 85% statistic in which American's from the USA do not venture out of the USA. I flew into Paris spent 5 days, then went to Brussels (1 day), Amsterdam (2 days), flew to Barcelona had to stay there a day because Venice was on strike, then to Venice (2 days). From Venice to Rome (5 days), then to (day trip) Naples, Pompeii, back to Rome then back to Paris and finally home.&lt;br /&gt;&lt;br /&gt;I can go into detail of how awesome this trip was but I am going to focus on how recharged I am. Man am I! During my travel times I geeked out and read the MySQL 5.1 Plugin Development book by Sergei Golubchik and Andrew Hutchings, which is AWESOME!&lt;br /&gt;&lt;br /&gt;Now that I am back I hope to put the things I learned from Plugin Development into action and write posts about them.&lt;br /&gt;&lt;br /&gt;Some Posts in the next couple of weeks that I am going to focus on. SQL optimizations, Optimizer problems in 5.1, Stalls, performance performance performance.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3298070247948878449?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3298070247948878449/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3298070247948878449' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3298070247948878449'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3298070247948878449'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/11/back-from-my-european-vacation.html' title='Back from my European Vacation'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5765663684579627808</id><published>2010-11-03T00:31:00.000-07:00</published><updated>2010-11-03T11:16:45.515-07:00</updated><title type='text'>Facebook Live: Running MySQL at Scale Tech Talk by Facebook</title><content type='html'>Thank you to &lt;a href="http://mituzas.lt/"&gt;Domas&lt;/a&gt;, &lt;a href="http://mysqlha.blogspot.com/"&gt;Mark&lt;/a&gt;, the &lt;a href="http://www.facebook.com/#!/MySQLatFacebook"&gt;cool team of DB Dudes at Facebook&lt;/a&gt; and to Facebook for hosting a spectacular event of beer, wine and of course mySQL.&lt;br /&gt;&lt;br /&gt;Here are some notes that I'd like to share and here is the &lt;a href="http://www.livestream.com/facebookevents/share?clipId=flv_cc08bf93-7013-41e3-81c9-bfc906ef8442&amp;utm_source=lsplayer&amp;utm_medium=ui-share&amp;utm_campaign=facebookevents&amp;utm_content=facebookevents"&gt;full video recording of the event&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Before I start, let me say that I'm super impressed with the tools that the Facebook DB Teams put together. They are to say the least better then commercial grade. The tool set covers all the staples like graphing show global status, InnoDB stats, real-time trending to see what the norm is and how things flux in a professional designed interface with drill downs to the actual root cause of the end query messing things up.&lt;br /&gt;&lt;br /&gt;Facebook has three (main?) database teams.&lt;br /&gt;&lt;br /&gt;Operations who fix the problem right now.&lt;br /&gt;Performance who fix the problem today or tomorrow.&lt;br /&gt;Engineering who don't fix the problem fast enough *joke*&lt;br /&gt;&lt;br /&gt;Even though there are disciplines the database teams all gel.&lt;br /&gt;&lt;br /&gt;Now for some stats:&lt;br /&gt;&lt;br /&gt;Query response time R(t): 4ms reads, 5ms writes&lt;br /&gt;Network bytes per second: 38GB @ peak&lt;br /&gt;queries per second: 13M&lt;br /&gt;Rows read per second: 450M @peak&lt;br /&gt;Rows changed per second: 3.5M @peak&lt;br /&gt;InnoDB disk ops per second: 3.5M&lt;br /&gt;&lt;br /&gt;What does it mean? WOW this is webscale.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;For Facebook, network latency is killer. Cross country queries hurt, especially done serially. For instance&lt;br /&gt;&lt;br /&gt;start a transaction&lt;br /&gt;add a new row to one table&lt;br /&gt;increment another table&lt;br /&gt;end a transaction&lt;br /&gt;&lt;br /&gt;each command is a network trip, the network latency can take up to 100ms yet the sql command is less then 10ms. Why put in all that effort of tuned servers, queries, code to give a bad user experience of 100ms.&lt;br /&gt;&lt;br /&gt;Using a feature in the mySQL client API, they can send multiple statements in the time it took for the 1st query delta. This API flag for mysql_connect is CLIENT_MULTI_STATEMENT.&lt;br /&gt;&lt;br /&gt;Why not just use triggers or stored procs? Because managing them is a nightmare, I've talked about this many times and I'm glad Facebook agrees - this is a great trick for reducing the cost of cross datacenter db calls producing dynamic database stored procs. I doubt that triggers or stored procs are fully gone.&lt;br /&gt;&lt;br /&gt;Then OSC, they talked about how much time this saved the company a few weeks ago and its staggering. What use to take days or longer is reduced to hours. I'm so impressed by this, that I asked my team to make this into a web app that can execute these commands across an arbitrary set of servers. My newest team member Einav finished it and we now use it in production (screen shots and a follow up post coming).&lt;br /&gt;&lt;br /&gt;Facebook is surprisingly open and really is fostering / giving back to the community. Keep up the great work!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5765663684579627808?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5765663684579627808/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5765663684579627808' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5765663684579627808'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5765663684579627808'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/11/facebook-live-running-mysql-at-scale.html' title='Facebook Live: Running MySQL at Scale Tech Talk by Facebook'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5937565851086306627</id><published>2010-09-30T11:28:00.000-07:00</published><updated>2010-09-30T11:59:50.048-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='NoSQL'/><category scheme='http://www.blogger.com/atom/ns#' term='mongodb'/><category scheme='http://www.blogger.com/atom/ns#' term='review'/><category scheme='http://www.blogger.com/atom/ns#' term='book'/><title type='text'>MongoDB the Definitive Guide by Kristina Chodrow and Michael Dirolf</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://oreilly.com/catalog/0636920001096/"&gt;&lt;img style="float:left; margin:0 10px 10px 0;cursor:pointer; cursor:hand;width: 180px; height: 236px;" src="http://covers.oreilly.com/images/0636920001096/cat.gif" border="0" alt="" /&gt;&lt;/a&gt;&lt;br /&gt;The kind folks at O'Reilly sent me a fantastic book about MongoDB. This was a great read since it’s suited for people who do Operations and Development and Performance tuning (me). I've been using Cassandra for quite some time now (months lol) and the thing that has irritated me about Cassandra is the documentation for it. Cassandra documentation sucks, its hard to speed up on the internals. This MongoDB book is written by the most active participants that are developing MongoDB and the knowledge shows. What I like is it starts out on how to quickly get it up, add/get/update data to the DB. Then progresses to more advance topics-that talk about GridFS and MongoDB drivers. Personally I would like to see more elaboration of this facet in terms of motivation of why do this, what the win is and how it fits into the "Fast by Default" mantra. Each step is organized perfectly, and detailed with nice graphics that illustrate the document store or the flow of data from a systems view. When looking at the documentation on mongodb.org I see the same sort of clarity in this book. Comparing other NoSQL information, I do not see this transparency, which is rather frustrating because the learning curve is much larger. I'm so impressed with the info, and test results around the web that I'm moving to add this to my environment. Does this mean I'll get rid of my current Cassandra deployment? Probably not since its working great for my needs now.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Overall, great book, great info, intelligently presented with a straightforward explanation of how MongoDB works.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5937565851086306627?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5937565851086306627/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5937565851086306627' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5937565851086306627'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5937565851086306627'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/09/mongodb-definitive-guide-by-kristina.html' title='MongoDB the Definitive Guide by Kristina Chodrow and Michael Dirolf'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-4065453669006375343</id><published>2010-09-03T15:42:00.000-07:00</published><updated>2010-09-14T14:08:17.086-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Cassandra'/><category scheme='http://www.blogger.com/atom/ns#' term='maatkit like'/><title type='text'>Cassandra and Ganglia</title><content type='html'>&lt;a href="http://www.flickr.com/photos/dathan/4955551654/" title="cassandra_tpstats_row_read_stage_completed by dathan, on Flickr"&gt;&lt;img src="http://farm5.static.flickr.com/4106/4955551654_58e8b73a44.jpg" width="500" height="320" alt="cassandra_tpstats_row_read_stage_completed" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;I finally got some time to do some house cleaning. One of my nagging low-hanging fruit jobs was to stop using jconsole as my monitor. I created a ganglia script to graph what is above. The image illustrated above I am showing all the Cassandra servers and their total row read stages completed in the last hour as a gauge.  In essence I am graphing the delta of the change between ganglia script runs.&lt;br /&gt;&lt;br /&gt;How I have it set up is:&lt;br /&gt;&lt;br /&gt;All data exposed by JMX to produce tpstats and cfstats is graphed via ganglia. The pattern for each graph is as follows&lt;br /&gt;&lt;br /&gt;cass_{stat_class}_{key}&lt;br /&gt;&lt;br /&gt;stat_class - tpc, tpp, tpa means complete, pending, active respectively&lt;br /&gt;key - would be message deserialization for instance.&lt;br /&gt;&lt;br /&gt;For column family stats I graph the keyspace stats as well as the specific column family stats exposed by cfstats. For instance below:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.flickr.com/photos/dathan/4955539434/" title="Cassandra cfstats with ganglia by dathan, on Flickr"&gt;&lt;img src="http://farm5.static.flickr.com/4105/4955539434_4fd19b1acd.jpg" width="382" height="500" alt="Cassandra cfstats with ganglia" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;If you’re interested in the scripts I'll send it to you or put it up on code.google.com, its written in perl OOP perl and takes the same approach of packaging that maatkit tool kit for mySQL by Xarb and crew does (puts all the "classes" in the file as the application).&lt;br /&gt;&lt;br /&gt;GmetricDelegate is the parent package&lt;br /&gt;GmetricCassandra extends GmetricDelegate and overloads getData as well as defines what is an absolute stats vrs a gauge.&lt;br /&gt;&lt;br /&gt;As you can see the pattern I also have&lt;br /&gt;GmetricInnoDB&lt;br /&gt;GmetricMySQL&lt;br /&gt;&lt;br /&gt;and so on.&lt;br /&gt;&lt;br /&gt;then on each server I run&lt;br /&gt;&lt;br /&gt;/usr/bin/perl -w /home/scripts/ganglia_gmetric.pl --module=GmetricCassandra&lt;br /&gt;&lt;br /&gt;this then talks to Ganglia through gmetric to report the stats.&lt;br /&gt;&lt;br /&gt;&lt;b&gt;Update:&lt;/b&gt; I uploaded an alpha version to http://code.google.com/p/gangliastats/ - be warned sparse comments I'll have another check in with documentation soon.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-4065453669006375343?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/4065453669006375343/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=4065453669006375343' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4065453669006375343'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4065453669006375343'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/09/cassandra-and-ganglia.html' title='Cassandra and Ganglia'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://farm5.static.flickr.com/4106/4955551654_58e8b73a44_t.jpg' height='72' width='72'/><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8760314169545999688</id><published>2010-08-18T15:28:00.000-07:00</published><updated>2010-08-18T16:19:52.140-07:00</updated><title type='text'>Review of MySQL High Availability by Charles Bell, Mats Kindahl and Lars Thalmann</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://oreilly.com/catalog/9780596807290"&gt;&lt;img style="float:right; margin:0 0 10px 10px;cursor:pointer; cursor:hand;width: 180px; height: 236px;" src="http://covers.oreilly.com/images/9780596807290/cat.gif" border="0" alt="" /&gt;&lt;/a&gt;&lt;br /&gt;The kind folks at O'Reilly sent me MySQL High Availability by Charles Bell, Mats Kindahl and Lars Thalmann . In summary, the book is awesome. Personally I didn't think that Replication was enough of a feature in mySQL to fill up 500+ pages about it, BUT, they did and not with fillers or info that is NOT actually important to you as the end designer. In &lt;a href="http://mysqlha.blogspot.com/"&gt;Mark Callaghan&lt;/a&gt;'s forward he states that this book "adequately" explains MySQL replication, this is a huge low-ball, I dare say if you did not look at the replication code (prior to row-based replication there was not a lot of it) your questions can be answered with this book. There are three parts to this book: Replication (roughly 50% of the book), Monitoring and Disaster Recovery, then finally High Availability Environments. Each part is well written and accurate. I have been using mySQL before Replication existed. When it was first released I have been using it and been to many talk about it, from the original author talks to Brian Aker’s talks to asking &lt;a href="http://jeremy.zawodny.com/"&gt;Jeremy Zawodny&lt;/a&gt; about it to giving my own talks about Scale-Out, replication, etc.  &lt;br /&gt;My experience with mySQL Replication is rather robust. I broke holes in it, found bugs with it, and even wrote a quick C client (when it was still statement based replication only on the 4.1 branch) that would act as a proxy to gather all the replication feeds of all boxes onto a single box (it was a proof of concept). The book goes into clear detail about the common approaches and use cases of replication scaling out the apps reads. It even talks about data sharding that I must say I am an expert at (done it for Flickr/RockYou, for friends etc). The examples are clear although I would not recommend deploying their examples verbatim. The reason you never want to mod based on the number of shards in your system, because if you add more shards you are going to have to move your data all around. Central dictionaries are perfect to control balance. They go into this, but not how to lock a user to migrate to a lightly loaded shard. The example that they have creates holes in the flow, race conditions as well as downtime to move data, this is a nitpick, the book is good.&lt;br /&gt;&lt;br /&gt;I do not want to give away the book, it is a good read and the quality is what you expect from O’Reilly. If you want to know about various replication techniques, replication shortcomings, building fail-over systems, and exploring other technologies that are comparable to replication, this is a great book for you. Oh and of note, they even go into exotic features that I rarely use in production and how that effects replication. Awesome.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8760314169545999688?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8760314169545999688/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8760314169545999688' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8760314169545999688'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8760314169545999688'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/08/review-of-mysql-high-availability-by.html' title='Review of MySQL High Availability by Charles Bell, Mats Kindahl and Lars Thalmann'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8880555644188705213</id><published>2010-07-08T12:58:00.000-07:00</published><updated>2010-07-08T15:36:25.926-07:00</updated><title type='text'>Upgrading Cassandra 0.5.1 to 0.6.3</title><content type='html'>Every month or so a node randomly dies&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;EQX root@cass01:/opt/cassandra/bin# ./nodeprobe -host localhost -port 8181 ring&lt;br /&gt;Address       Status     Load          Range                                      Ring&lt;br /&gt;                                       facebook_1301003235_1301003235             &lt;br /&gt;10.129.28.22  Down       15.77 GB      9ZehBzpHHwnxiPJU                           |&lt;--|&lt;br /&gt;10.129.28.23  Up         7.59 GB       facebook_100000471858343_1514390063        |   |&lt;br /&gt;10.129.28.14  Up         4.59 GB       facebook_100000846936312                   |   |&lt;br /&gt;10.129.28.20  Up         12.94 GB      facebook_1301003235_1301003235             |--&gt;|&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Trying to get info from the host, the reads timeout.&lt;br /&gt;java.net.SocketTimeoutException: Read timed out&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Doing an lsof -p on the java proc I see that it is holding open a bunch of sockets. So the node itself is hanging on something internal is my assumption.&lt;br /&gt;&lt;br /&gt;Looking at /var/log/cassandra/system.log I see that the last rotation happened Jun 8th over a month ago and no new log is being written to. THe issue is the node just died today. So this seems like a bug to me.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now since Cassandra does not tell me what the problem is, I assume that there is a bug in this version and searching Cassandra &lt;a href="https://issues.apache.org/jira/sr/jira.issueviews:searchrequest-printable/temp/SearchRequest.html?query=node+hung&amp;summary=true&amp;description=true&amp;pid=12310865&amp;fixfor=12314040&amp;tempMax=1000"&gt;Jira bug database&lt;/a&gt; I see that a lot of stuff is fixed as well as added. So might as well as upgrade.&lt;br /&gt;&lt;br /&gt;Before I upgrade I wanted to do research to see if anyone else has.  To my surprise there doesn't seem to be any blog talking about upgrading from &lt;a href="http://www.google.com/search?hl=en&amp;q=cassandra+upgrade+from+0.5+to+0.6.3&amp;btnG=Search&amp;aq=f&amp;aqi=&amp;aql=&amp;oq=&amp;gs_rfai="&gt;0.5 to 0.6.3&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;I know its rather easy but there is some new stuff in 0.6.3 that is turned on by default: So let's see what changes in the conf&lt;br /&gt;&lt;br /&gt;diff /opt/cassandra/conf /opt/apache-cassandra-0.6.3/conf&lt;br /&gt;&lt;br /&gt;I see that in storage.xml there is some new XML attributes for the ColumnFamily tag such as RowsCached, new tags called &lt;a href="http://wiki.apache.org/cassandra/HintedHandof"&gt;HintedHandoffEnabled&lt;/a&gt;, Authenticator, DiskAccessMode, &lt;a href="http://wiki.apache.org/cassandra/CassandraLimitations"&gt;RowWarningThresholdInMB&lt;/a&gt;.&lt;br /&gt;Additional to this I noticed that a lot of XML tags are missing. A rolling upgrade is just not possible and is mentioned in NEWS.txt&lt;br /&gt;&lt;br /&gt;Thus in my application I set this $GLOBALS['cfg']['disable_nosql_feature'] = 1; I have about 40 toggles to play with, a very helpful process to enable dynamically code with out breaking your site.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;now time for an upgrade without the service running:&lt;br /&gt;&lt;br /&gt;Steps:&lt;br /&gt;&lt;OL&gt;&lt;br /&gt;&lt;LI&gt; Shut down Cassandra: dsh -g cassandra "pkill java" # same thing as stop-server&lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; rpm -e cassandra-0.5.1 &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; rpm -ivh cassandra-0.6.3.rpm &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; /opt/cassandra/bin/cassandra &lt;/LI&gt;&lt;br /&gt;&lt;/OL&gt;&lt;br /&gt;&lt;br /&gt;Done. Note what the hell is cassandra-0.6.3.rpm, it's an rpm I created that has my storage-conf.xml&lt;br /&gt;log4j.properties&lt;br /&gt;cassandra.in.sh&lt;br /&gt;&lt;br /&gt;After Upgrading:&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;***************************************************************&lt;br /&gt;WARNING: ./nodeprobe is obsolete, use ./nodetool instead&lt;br /&gt;***************************************************************&lt;br /&gt;Address       Status     Load          Range                                      Ring&lt;br /&gt;                                       facebook_1301003235_1301003235             &lt;br /&gt;10.129.28.22  Up         11.75 GB      9ZehBzpHHwnxiPJU                           |&lt;--|&lt;br /&gt;10.129.28.23  Up         3.04 GB       facebook_100000471858343_1514390063        |   |&lt;br /&gt;10.129.28.14  Up         2.33 GB       facebook_100000846936312                   |   |&lt;br /&gt;10.129.28.20  Up         4.4 GB        facebook_1301003235_1301003235             |--&gt;|&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now what is left to do it change my ganglia scripts / nagios scripts to use nodetool instead of nodeprobe.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8880555644188705213?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8880555644188705213/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8880555644188705213' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8880555644188705213'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8880555644188705213'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/07/upgrading-cassandra-051-to-063.html' title='Upgrading Cassandra 0.5.1 to 0.6.3'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1663233179474927611</id><published>2010-06-25T01:02:00.000-07:00</published><updated>2010-06-25T02:19:02.477-07:00</updated><title type='text'>dsh and TABLE CHECKSUM</title><content type='html'>So running through some various tasks, I'm finally on the section of work where I can resurrect a script that finds inconsistent data between master-master pairs. &lt;br /&gt;&lt;br /&gt;Let's get a quick summary to find our problems to target the script at.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;dsh -ef 20 -w dbfacebook9b,dbfacebook9a -s ~/bin/execute_query.sh "FacebookShard \"CHECKSUM TABLE RollingUserLog\""&lt;br /&gt;dbfacebook9b: *************************** 1. row ***************************&lt;br /&gt;dbfacebook9b:    Table: FacebookShard.RollingUserLog&lt;br /&gt;dbfacebook9b: Checksum: 538386033&lt;br /&gt;dbfacebook9a: *************************** 1. row ***************************&lt;br /&gt;dbfacebook9a:    Table: FacebookShard.RollingUserLog&lt;br /&gt;dbfacebook9a: Checksum: 538386033&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;not bad.&lt;br /&gt;&lt;br /&gt;CHECKSUM TABLE uses a ACCUM algorithm to determine the table checksum. This gives a good fast snapshot in a shared lock mode so writes do not stop while doing a checksum (this is in the context of INNODB). RollingUserLog gets nearly 700 writes per sec per box (No alerts).&lt;br /&gt;&lt;br /&gt;But what if there is a problem&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;# dsh -ef 20 -w dbfacebook38b,dbfacebook38a -s ~/bin/execute_query.sh "FacebookShard \"CHECKSUM TABLE SimulatedProblem\""&lt;br /&gt;dbfacebook38b: *************************** 1. row ***************************&lt;br /&gt;dbfacebook38b:    Table: FacebookShard.SimulatedProblem&lt;br /&gt;dbfacebook38b: Checksum: 660032421&lt;br /&gt;dbfacebook38a: *************************** 1. row ***************************&lt;br /&gt;dbfacebook38a:    Table: FacebookShard.SimulatedProblem&lt;br /&gt;dbfacebook38a: Checksum: 2533654621&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Well let's see if there is a row count mismatch&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;# dsh -ef 20 -w dbfacebook38b,dbfacebook38a -s ~/bin/execute_query.sh "FacebookShard \"SELECT COUNT(*) FROM SimulatedProblem\""&lt;br /&gt;dbfacebook38b: *************************** 1. row ***************************&lt;br /&gt;dbfacebook38b: COUNT(*): 64358&lt;br /&gt;dbfacebook38a: *************************** 1. row ***************************&lt;br /&gt;dbfacebook38a: COUNT(*): 64358&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Ah so the data is inconsistent, we are not missing rows but 1 or more rows have different values. Now time to find it.&lt;br /&gt;&lt;br /&gt;To resurrect my script now that I know what table is messed up:&lt;br /&gt;&lt;br /&gt;Algorithm is this:&lt;br /&gt;&lt;br /&gt;open connection to both servers - fork&lt;br /&gt;compare the data by scanning the table via a index walk&lt;br /&gt;crc32/md5/hash the rows&lt;br /&gt;compare columns &lt;br /&gt;if (dbfacebook38b.hash != dbfacebook38a.hash)&lt;br /&gt;mark row and record primary key to track position&lt;br /&gt;&lt;br /&gt;print report&lt;br /&gt;&lt;br /&gt;but wait why polish up my script when I can use a formal one that does the job great!&lt;br /&gt;&lt;br /&gt;@see&lt;br /&gt;&lt;br /&gt;mk-table-checksum&lt;br /&gt;mk-table-sync&lt;br /&gt;&lt;br /&gt;# fixes the issue&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;mk-table-sync h=dbfacebook38a,u=$USER,p=$PASS,D=FacebookShard,t=SimulatedProblem h=dbfacebook38b --execute --no-check-slave&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;maatkit is awesome. Nearly every problem that I run into and built a script for has been formalized. I'm going to muster up some time and contribute to this toolkit like add my binary log rotate which makes sure that the slaves are caught up to the binary file that is being purged.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1663233179474927611?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1663233179474927611/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1663233179474927611' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1663233179474927611'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1663233179474927611'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/06/dsh-and-table-checksum.html' title='dsh and TABLE CHECKSUM'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5149479564300930805</id><published>2010-06-07T12:07:00.000-07:00</published><updated>2010-06-07T12:47:39.401-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='ndb'/><category scheme='http://www.blogger.com/atom/ns#' term='packt'/><category scheme='http://www.blogger.com/atom/ns#' term='replication'/><category scheme='http://www.blogger.com/atom/ns#' term='book'/><title type='text'>Review of High Availability MySQL Cookbook by Packt Publishing</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://www.packtpub.com/sites/default/files/imagecache/productview/9942_High%20Availability%20MySQL%20Cookbook.jpg"&gt;&lt;img style="float:right; margin:0 0 10px 10px;cursor:pointer; cursor:hand;width: 125px; height: 152px;" src="http://www.packtpub.com/sites/default/files/imagecache/productview/9942_High%20Availability%20MySQL%20Cookbook.jpg" border="0" alt="" /&gt;&lt;/a&gt;&lt;br /&gt;A few months ago, I reviewed MySQL Admin Cookbook. Today I am reviewing High &lt;a href="https://www.packtpub.com/high-availability-mysql-cookbook/book"&gt;Availability MySQL Cookbook from Packt Publishing by Alex Davies&lt;/a&gt;. Overall, I found the book to contain some good hidden Gems.&lt;br /&gt;&lt;br /&gt;The book is a mixture of MySQL Cluster (NDB), Replication schemes, some performance tuning, some minor kernel tweaking, and some more exotic approaches to common High Availability problems. Overall, I found this book very informative and a good read.&lt;br /&gt;Now the specifics, the book starts out on NDB and stays focused on this fact for about 60% of the book. The next 20% is on mySQL replication then about 10% of the book is on tweaking kernel, mysql, network settings to get the most out of the system. The last 10% is a mixture of uses of exotic systems such as GFS, Conga, ISCSI and how to use these shared storage techs with mySQL.&lt;br /&gt;&lt;br /&gt;High Availability MySQL Cookbook, is a good read, and jogged my memory on NDB (since I do not use it on a day-to-day basis). If you are looking for a good reference on how to get an HA system up then this is a good book for you. If you are looking for why to use NDB over mySQL multi-master replication, this is not a book for you. The “why-to-use-this-over-that” is not the scope of the book. The drawbacks and concerns of what technology to use are not a focus of this book, and should not be. It is assumed that the reader knows which direction to go into for the most part or gives the reader enough information to set up and environment to see which way to go.&lt;br /&gt;&lt;br /&gt;In conclusion, I like this book. I think it is rather concise, and right to the point, which either gets you started into building HA systems or gives you a good reference for an existing HA environment.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5149479564300930805?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5149479564300930805/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5149479564300930805' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5149479564300930805'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5149479564300930805'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/06/review-of-high-availability-mysql.html' title='Review of High Availability MySQL Cookbook by Packt Publishing'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6322578509712935265</id><published>2010-04-29T13:43:00.001-07:00</published><updated>2010-04-29T13:43:18.048-07:00</updated><title type='text'>Mastering the art of indexing</title><content type='html'>Check out this SlideShare Presentation:  I'd have to say that this is the coolest presentation on Indexing and how it relates to INNODB. I have written on this subject many times in the pass but this presentation covers all the bases and does a great job at explaining WHY you should do certain statements over others.&lt;div style="width:425px" id="__ss_3739976"&gt;&lt;strong style="display:block;margin:12px 0 4px"&gt;&lt;a href="http://www.slideshare.net/matsunobu/more-mastering-the-art-of-indexing" title="More mastering the art of indexing"&gt;More mastering the art of indexing&lt;/a&gt;&lt;/strong&gt;&lt;object id="__sse3739976" width="425" height="355"&gt;&lt;param name="movie" value="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=moremasteringtheartofindexing-100415172610-phpapp02&amp;stripped_title=more-mastering-the-art-of-indexing" /&gt;&lt;param name="allowFullScreen" value="true"/&gt;&lt;param name="allowScriptAccess" value="always"/&gt;&lt;embed name="__sse3739976" src="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=moremasteringtheartofindexing-100415172610-phpapp02&amp;stripped_title=more-mastering-the-art-of-indexing" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="355"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;div style="padding:5px 0 12px"&gt;View more &lt;a href="http://www.slideshare.net/"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/matsunobu"&gt;Yoshinori Matsunobu&lt;/a&gt;.&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6322578509712935265?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6322578509712935265/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6322578509712935265' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6322578509712935265'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6322578509712935265'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/04/mastering-art-of-indexing.html' title='Mastering the art of indexing'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8072298254487913425</id><published>2010-04-13T14:52:00.000-07:00</published><updated>2010-04-15T17:47:13.432-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='review'/><title type='text'>Review of MySQL Admin Cookbook from PACKT Publishing</title><content type='html'>&lt;a href="https://www.packtpub.com/mysql-admin-cookbook/book"&gt;PACKT Publishing&lt;/a&gt; sent me titled "&lt;a href="http://www.amazon.com/MySQL-Admin-Cookbook-Daniel-Schneller/dp/1847197965/ref=sr_1_1?ie=UTF8&amp;s=books&amp;qid=1271203041&amp;sr=1-1"&gt;MySQL Admin Cookbook&lt;/a&gt;" to review and I told them that I would be brutally honest about it. They said cool and well here, we go.&lt;br /&gt;&lt;br /&gt;Overall, the book is cool if you are starting out in MySQL administration and want to get a box up and running. If you are looking to scale MySQL or make your application faster this is not the book for you. If you are worried about consistency and getting the most out of your hardware-this is not the book for you. If you are trying to figure out what the best index combination is-again-this is not the book for you. If you want to know how to add users, or set up replication, or dump a CSV format text file of data then this is the book for you. &lt;br /&gt; &lt;br /&gt;Some things that annoy me from this book is all of the GUI cut and paste screen shots. Explaining stuff with a GUI screen shot really sucks IMHO since by the time you read the book, the GUI changed. I personally stick with command line interfaces or write my own GUI layouts to administration actions since I know what the various ADMIN commands do. Let me stress again that GUI explanations really go out of date fast and is only pertinent for when the book is made. For instance if you ever used Eclipse, a common IDE for various languages (mainly Java), between Eclipse builds the GUI changes. The overall interface for the MySQL command line client has stayed the same since the very beginning. To be fair though the book does show some mySQL command line examples, like for handling NULLs but consistency is key to getting your ideas across.&lt;br /&gt;&lt;br /&gt;Another pet peeve of mine is the book has a tag line &lt;span style="font-style:italic;"&gt;99 great recipes for mastering MySQL configuration and administration&lt;/span&gt; yet I couldn't confirm 99 recipes since the book is not actually structured this way IMHO. It is structured in the format of "How to do it", "How it works", and "there's more..." for certain actions and there is just not enough meat for Mastering MySQL configurations – like what is a Star Replication Schema and how to do it? How do you rotate in new servers when in a circular MySQL config? Where is MySQL clustering? Why are file sorts so slow? How is MySQL using the disk subsystem with this config ... etc.&lt;br /&gt;&lt;br /&gt; IN conclusion, would I recommend this book to readers? If you need a starting point to ask Google for some more complicated questions-this is a good start. For experienced administrators, no it is not for you.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8072298254487913425?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8072298254487913425/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8072298254487913425' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8072298254487913425'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8072298254487913425'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/04/review-of-mysql-admin-cookbook-from.html' title='Review of MySQL Admin Cookbook from PACKT Publishing'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-4173777374413942729</id><published>2010-03-26T10:45:00.000-07:00</published><updated>2010-03-26T11:09:36.476-07:00</updated><title type='text'>Some kernel tweaks to aid Cassandra under a high concurrency environment</title><content type='html'>For the past couple of weeks I have been trouble shooting some Cassandra issues where data would not make it to Cassandra.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.flickr.com/photos/dathan/4464575619/" title="Graph of various tracked Exceptions by dathan, on Flickr"&gt;&lt;img src="http://farm5.static.flickr.com/4047/4464575619_c1f48240bd_b.jpg" width="1024" height="382" alt="Graph of various tracked Exceptions" /&gt;&lt;/a&gt; &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The image above graphs all the exceptions that are produced from Cassandra. The two big lines are&lt;br /&gt;&lt;br /&gt;Transport Exceptions (te) - meaning that Cassandra could not answer the request think of this as MAX Connection errors in mySQL.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Unavailable Exceptions (ue) - meaning that Cassandra could answer the request but the "storage engine" cannot do anything with it because its busy doing something like communicating with other nodes or maintenance like a node cleanup.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;So how did I get the graph to drop to 0? After looking at the error logs, I saw that Cassandra was getting flooded with SYN Requests and the kernel thought that it was a SYN Flood and did this&lt;br /&gt;&lt;br /&gt;&lt;i&gt;possible SYN flooding on port 9160. Sending cookies.&lt;/i&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;To stop this the puppet profile was changed to have&lt;br /&gt;&lt;br /&gt;sysctl -w net.ipv4.tcp_max_syn_backlog=4096&lt;br /&gt;sysctl -w net.ipv4.tcp_syncookies=0&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Next looking into the Cassandra log which I defined to exist in /var/log/cassandra/system.log&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;WARN [TCP Selector Manager] 2010-03-26 02:46:31,619 TcpConnectionHandler.java (line 53) Exception was generated at : 03/26/2010 02:&lt;br /&gt;Too many open files&lt;br /&gt;java.io.IOException: Too many open files&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Then noticed that &lt;br /&gt;ulimit -n == 1024&lt;br /&gt;&lt;br /&gt;thus I changed&lt;br /&gt;/etc/security/limits.conf so that It's at a server setting by adding this:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;*                                         -              nofile                   8000&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Now my Transport Exceptions and Unavailable Exceptions are gone and data is being written to it consistently.&lt;br /&gt;&lt;br /&gt;There are many other ways of doing the same thing, I could have modified my init script or did some other stuff but I choose this way. Default Distros set kernel and limits fields too low: settings for desktop levels.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-4173777374413942729?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/4173777374413942729/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=4173777374413942729' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4173777374413942729'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4173777374413942729'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/03/some-kernel-tweaks-to-aid-cassandra.html' title='Some kernel tweaks to aid Cassandra under a high concurrency environment'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://farm5.static.flickr.com/4047/4464575619_c1f48240bd_t.jpg' height='72' width='72'/><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7150075960389202043</id><published>2010-03-23T18:36:00.000-07:00</published><updated>2010-03-23T20:32:54.831-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Cassandra'/><category scheme='http://www.blogger.com/atom/ns#' term='NoSQL'/><category scheme='http://www.blogger.com/atom/ns#' term='throughput'/><title type='text'>Cassandra is my NoSQL solution but..</title><content type='html'>In the past few months, I have tested many NoSQL solutions. Redis, MongoDB, HBase yet Cassandra is the Column Store DB I picked because of its speed (on writes), reliability, built in feature set that makes it multi-datacenter aware. The one other personal reward for Cassandra is it is written in Java. I like reading and writing in Java more than C++ although it really does not matter for me personally in the end.&lt;br /&gt;&lt;br /&gt;Let us talk about the reason why I am introducing Cassandra into my infrastructure and some of its drawbacks I have noticed so far.&lt;br /&gt;&lt;br /&gt;Why it is being introduced:&lt;br /&gt;We have a feature where we record every single click for 50 million Monthly Active Users (real-time) and storing this in mySQL is just waste of semi-good hardware for data that is only looked at for the past 24 hours. Over the course of some time (couple of months) more than 3 billion rows accumulated, which translated into a 3.5 TB distributed INNODB datafile. So purging/archiving this data just sucks.&lt;br /&gt;&lt;br /&gt;Now introducing user clicks into Cassandra was rather easy. I researched various sites, asked my Digg buddy and then figured out the rest. Within two days I was up and running with Cassandra and had a great understanding about Column Families and SuperColumns.&lt;br /&gt;&lt;br /&gt;Developing with a Cassandra Data layer:&lt;br /&gt;&lt;br /&gt;Now that the infrastructure is up, I needed to add a data model to /opt/cassandra/conf/storage.conf. The SQL that drove this functionality consisted of two main SQL statements.&lt;br /&gt;Add click&lt;br /&gt;Get a Range of Clicks&lt;br /&gt;&lt;br /&gt;For these operations, mySQL rarely takes 0.001 seconds (1 ms).&lt;br /&gt;&lt;br /&gt;Cassandra for writes is rather fast, but for reads, Getting Range Clicks&lt;br /&gt;i.e. using &lt;br /&gt;recv_get_slice&lt;br /&gt;&lt;br /&gt;it takes .02 seconds (20 ms).&lt;br /&gt;&lt;br /&gt;What does this mean? MySQL is a hell of a lot faster! Is it because of my CF design? No.&lt;br /&gt;for instance, take this slug (keys separated by a delimiter to make a distinct key name).&lt;br /&gt;&lt;br /&gt;{$clicker}_{$pageowner} =&gt; [ $object_id_clicked0 =&gt; $value,$ts, ... $object_id_clicked =&gt;N $value,$ts ]&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The slug says that clicker A, clicked on page B and what is stored are columns of distinct clicks - remember this is a column store DB.&lt;br /&gt;&lt;br /&gt;So &lt;br /&gt;{$clicker}_{$pageowner} == the KEY for the COLUMN $object_id_clicked with the value being a $value and the free extra value the $timestamp.&lt;br /&gt;&lt;br /&gt;Here is the php code&lt;br /&gt;&lt;code&gt;&lt;br /&gt;$columnPath = $this-&gt;getColumnPath($objectid);&lt;br /&gt;              $this-&gt;getCassandraConnect()-&gt;insert(self::KEYSPACE, $this-&gt;getKey($clicker, $pageOwner), $columnPath, $this-&gt;today_ts, microtime(true), ConsistencyLevel::ZERO);&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now I want a list of items clicked.&lt;br /&gt;&lt;code&gt;&lt;br /&gt;$data = $this-&gt;getCassandraConnect()-&gt;get_slice(self::KEYSPACE, $this-&gt;getKey($clicker, $pageOwner), $this-&gt;getColumnParent(), $this-&gt;getSlicePredicate(), ConsistencyLevel::ONE);&lt;br /&gt;&lt;/code&gt;&lt;br /&gt;&lt;br /&gt;This says give me the last N logged clicks that the clicker A made for page owner B. This is a hash lookup (Big-O(1)) but a sorted list of columns (O(nlogn)) and return the last N elements.&lt;br /&gt;&lt;br /&gt;Why is Cassandra sooo slow on reads. Is it because my memory config is not enough?&lt;br /&gt;No. 7 GB of data is allocated for data that fits in memory ( for now ).&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Is it because my disk is saturated?&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;avg-cpu:  %user   %nice %system %iowait  %steal   %idle&lt;br /&gt;          12.21    0.00    2.85    0.48    0.00   84.47&lt;br /&gt;&lt;br /&gt;Device:         rrqm/s   wrqm/s   r/s   w/s   rsec/s   wsec/s avgrq-sz avgqu-sz   await  svctm  %util&lt;br /&gt;sda               0.01    14.01  0.23  6.61    22.25   165.03    27.34     0.33   47.57   7.35   5.04&lt;br /&gt;sda1              0.00     0.00  0.00  0.00     0.00     0.00    20.83     0.00    7.74   6.40   0.00&lt;br /&gt;sda2              0.01    14.01  0.23  6.61    22.25   165.03    27.34     0.33   47.57   7.35   5.04&lt;br /&gt;dm-0              0.00     0.00  0.24 20.63    22.25   165.03     8.97     0.72   34.27   2.41   5.04&lt;br /&gt;dm-1              0.00     0.00  0.00  0.00     0.00     0.00     8.00     0.00    5.28   0.22   0.00&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;No its not.&lt;br /&gt;&lt;br /&gt;My &lt;span style="font-weight:bold;"&gt;hunch&lt;/span&gt; is the slowdown is a combo of the low thread read pool and in the inherent sorting and there doesn't seem to be a way to turn it off, so without digging in the code I couldn't say (I will be able to once I get an understanding of the code layout). &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;In conclusion, I like Cassandra, it’s very fast in writes, slow (for my taste but fast enough) in reads and what takes 2 lines of SQL takes 250 lines of PHP code interfacing with the Cassandra.thrift suite. I am going to use it still because it is good enough and I love the built in HA of it. Additionally the performance is improving very quickly.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7150075960389202043?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7150075960389202043/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7150075960389202043' title='12 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7150075960389202043'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7150075960389202043'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/03/cassandra-is-my-nosql-solution-but.html' title='Cassandra is my NoSQL solution but..'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>12</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1594478753725575091</id><published>2010-03-10T15:56:00.000-08:00</published><updated>2010-03-10T15:59:58.070-08:00</updated><title type='text'>Talking at the University of Utah</title><content type='html'>Giving a talk at the University of Utah on everything from scaling, clustering, mysql, mysql internals, noSQL (Cassandra) to how to manage all this stuff. If you are there at University I'm bringing some Swag!&lt;br /&gt;&lt;br /&gt;Also I will upload the slides and put them here.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1594478753725575091?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1594478753725575091/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1594478753725575091' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1594478753725575091'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1594478753725575091'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/03/talking-at-university-of-utah.html' title='Talking at the University of Utah'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3344119825973531885</id><published>2010-02-25T11:01:00.000-08:00</published><updated>2010-02-25T11:05:37.077-08:00</updated><title type='text'>NoSQL explained to DBAs</title><content type='html'>&lt;a href="http://krow.livejournal.com/684898.html"&gt;Brian Aker&lt;/a&gt;, a brilliant helpful duder, who I learn a lot from. Gives a great talk about what is NoSQL explained in a way for database guys. I warn you, there are some points in this video where you can't hear Brian due to the audience "participation" but you should get the content.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;object width="425" height="344"&gt;&lt;param name="movie" value="http://www.youtube.com/v/LhnGarRsKnA&amp;hl=en_US&amp;fs=1&amp;"&gt;&lt;/param&gt;&lt;param name="allowFullScreen" value="true"&gt;&lt;/param&gt;&lt;param name="allowscriptaccess" value="always"&gt;&lt;/param&gt;&lt;embed src="http://www.youtube.com/v/LhnGarRsKnA&amp;hl=en_US&amp;fs=1&amp;" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="344"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3344119825973531885?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3344119825973531885/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3344119825973531885' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3344119825973531885'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3344119825973531885'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/02/nosql-explained-to-dbas.html' title='NoSQL explained to DBAs'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3160215463006945464</id><published>2010-02-04T11:24:00.001-08:00</published><updated>2010-02-04T12:12:50.607-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='maintenance'/><category scheme='http://www.blogger.com/atom/ns#' term='INNODB'/><title type='text'>innodb_file_per_table, shrinking table spaces and the data dictionary</title><content type='html'>INNODB has some irritating gotchas that makes disk space management hard. In 2002ish INNODB, added innodb_file_per_table to get around allot of these issues, but it does not fix everything.&lt;br /&gt;&lt;br /&gt;If you are running innodb_file_per_table, you will notice in your database directory &lt;br /&gt;&lt;UL&gt;&lt;br /&gt;&lt;LI&gt;db.opt - database characteristics file. &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; tablename.frm - the table structure. &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; tablename.ibd - the actual innodb table space file &lt;/LI&gt;&lt;br /&gt;&lt;/UL&gt;&lt;br /&gt;&lt;br /&gt;Imagine that you have a table with 10 million rows and you delete say 5 million rows in multiple chunks around 400K chunks, because deletes are slow. Next, you notice that the table space file did not shrink. So what do you do? OPTIMIZE tablename, tada all the wasted space is reclaimed, but here is the PROBLEM the ibdata file grew!&lt;br /&gt;&lt;b&gt;&lt;br /&gt;ibdata stores all of the UNDO LOGS thus GROWS due to the deletes and space is never reclaimed.&lt;br /&gt;&lt;/b&gt;&lt;br /&gt;&lt;br /&gt;Ok, lets try coping the tablespace file to another directory and re-import the tablespace file after wiping the data dictionary (ibdata).&lt;br /&gt;&lt;br /&gt;For instance&lt;br /&gt;/etc/init.d/mysql stop&lt;br /&gt;cd /var/lib/mysql/DB&lt;br /&gt;cp * /tmp/hold&lt;br /&gt;rm /var/lib/mysql/ib*&lt;br /&gt;cp /tmp/hold/* /var/lib/mysql/DB&lt;br /&gt;/etc/init.d/mysql start // create the ibdata file&lt;br /&gt;&lt;br /&gt;ALTER TABLE tablename IMPORT TABLESPACE&lt;br /&gt;ERROR 1146 (42S02): Table 'DB.tablename' does not exist&lt;br /&gt;&lt;br /&gt;Really, there is nothing that you can do to force innodb to purge the undo logs in the ibdata file without a full dump of the data to text and then reimport the data.&lt;br /&gt;&lt;br /&gt;So here are the steps to shrink all table spaces and the ibdata file&lt;br /&gt;&lt;br /&gt;mysqldump --all-databases (or use mk-parallel-dump)&lt;br /&gt;stop mysql&lt;br /&gt;rm -f /var/lib/mysql/ib* /var/lib/mysql/DB/*&lt;br /&gt;start mysql&lt;br /&gt;mysqlimport (or use mk-parallel-restore)&lt;br /&gt;&lt;br /&gt;Takes a bunch of time but there is no other recourse. If you know of another way please share :)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3160215463006945464?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3160215463006945464/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3160215463006945464' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3160215463006945464'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3160215463006945464'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/02/innodbfilepertable-shrinking-table.html' title='innodb_file_per_table, shrinking table spaces and the data dictionary'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7904574305340113837</id><published>2010-01-08T16:21:00.000-08:00</published><updated>2010-02-16T12:07:41.604-08:00</updated><title type='text'>Scaling to 200K Transactions per Second with Open Source - MySQL, Java, curl, PHP</title><content type='html'>I'll be giving a web presentation Jan. 19th 2010. If you would like to check it out please do!&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.mysql.com/news-and-events/web-seminars/display-475.html" alt="click me"&gt;Scaling to 200K Transactions per Second with Open Source - MySQL, Java, curl, PHP&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I thought it went well. I will post the link to the slides here :)&lt;br /&gt;&lt;br /&gt;&lt;a title="View Scaling to 200K Transactions per Second with Open Source - MySQL, Java, curl, PHP on Scribd" href="http://www.scribd.com/doc/26955045/Scaling-to-200K-Transactions-per-Second-with-Open-Source-MySQL-Java-curl-PHP" style="margin: 12px auto 6px auto; font-family: Helvetica,Arial,Sans-serif; font-style: normal; font-variant: normal; font-weight: normal; font-size: 14px; line-height: normal; font-size-adjust: none; font-stretch: normal; -x-system-font: none; display: block; text-decoration: underline;"&gt;Scaling to 200K Transactions per Second with Open Source - MySQL, Java, curl, PHP&lt;/a&gt; &lt;object id="doc_571268112673426" name="doc_571268112673426" height="600" width="100%" type="application/x-shockwave-flash" data="http://d1.scribdassets.com/ScribdViewer.swf" style="outline:none;" &gt;  &lt;param name="movie" value="http://d1.scribdassets.com/ScribdViewer.swf"&gt;  &lt;param name="wmode" value="opaque"&gt;   &lt;param name="bgcolor" value="#ffffff"&gt;   &lt;param name="allowFullScreen" value="true"&gt;   &lt;param name="allowScriptAccess" value="always"&gt;   &lt;param name="FlashVars" value="document_id=26955045&amp;access_key=key-2cb93mz5nrq8armj4zw0&amp;page=1&amp;viewMode=slideshow"&gt;   &lt;embed id="doc_571268112673426" name="doc_571268112673426" src="http://d1.scribdassets.com/ScribdViewer.swf?document_id=26955045&amp;access_key=key-2cb93mz5nrq8armj4zw0&amp;page=1&amp;viewMode=slideshow" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" height="600" width="100%" wmode="opaque" bgcolor="#ffffff"&gt;&lt;/embed&gt;  &lt;/object&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7904574305340113837?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7904574305340113837/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7904574305340113837' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7904574305340113837'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7904574305340113837'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2010/01/scaling-to-20k-transactions-per-second.html' title='Scaling to 200K Transactions per Second with Open Source - MySQL, Java, curl, PHP'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-4709619962141971609</id><published>2009-11-09T16:58:00.000-08:00</published><updated>2009-11-10T12:43:24.549-08:00</updated><title type='text'>Asynchronous Queries verses Synchronous  Queries</title><content type='html'>In a procedural language without the use of threads (or Inter Process Communication via forks), to execute I/O requests they are done one after another. Synchronous Queries produce at best a Big-O of N such that N is an element of I/O communication (queries) and N equals the number of queries needed to achieve the requested dataset.&lt;br /&gt;With IPC or threads we can speed up common O(N) problems to reduce the N with parallelism, its still functionally a O(N) yet from a single instance point of view N is much less because threads (IPC) takes that Serial computing component and executes the code in parallel. To better explain what I am talking about lets look at some PHP code:&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;foreach($friends as $friend){&lt;br /&gt;     $data[] = getMySQLData(“SELECT * FROM AccountData WHERE userid = $friend);&lt;br /&gt;}&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;The Primary key for the AccountData table is userid. Assuming that you have 5000 friends, the query has to be executed 5000 times.&lt;br /&gt;We can reduce the O(N) and change it to a O(nlogn) (Binary Tree - doesn't take into account other factors) by switching the query to&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;$data = getMYSQLData(“SELECT * FROM AccountData WHERE userid IN (….)”);&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;We just sped up the retrieval of the data significantly, yet we just introduced a bottle neck on the datalayer. Our architecture requires that the data is located in a single location.&lt;br /&gt;&lt;br /&gt;What if AccountData’s data is spread across many servers federated by userid? This means that userid belongs to a server, so the server contains a shard of the AccountData’s Data.&lt;br /&gt;&lt;br /&gt;Now we are back to a O(N) where each query needs to be executed on the corresponding shard. The logical next step is to group queries per shard and run across them all. For instance&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;$multiShardIDs = $genericShard-&gt;getMultipleShardIDs($objIds);&lt;br /&gt;foreach ($multiShardIDs as $shardID =&gt; $shardUserIDs) {&lt;br /&gt;           if (stripos($orgQuery, " WHERE ") !== false){&lt;br /&gt;               $query = $orgQuery." AND {$column} IN (".implode(',', $shardUserIDs).") ";&lt;br /&gt;           }               &lt;br /&gt;           else{&lt;br /&gt;               $query = $orgQuery." WHERE {$column} IN (".implode(',', $shardUserIDs).") ";               &lt;br /&gt;           }&lt;br /&gt;&lt;br /&gt;           $shard_to_sql[$shardID] = $query;&lt;br /&gt;&lt;br /&gt;           .... more stuff ....&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Yet this is still a O(N) its just that N is smaller. Each query is still executed serially. &lt;br /&gt;Let’s look at some stats of synchronous queries of SELECT 1; This query is executed across 35 shards and the timings are from PHP point of view.&lt;br /&gt;&lt;br /&gt;&lt;table border=1&gt;&lt;tr&gt;&lt;th&gt;Field&lt;/th&gt;&lt;th&gt;End Value&lt;/th&gt;&lt;th&gt;Start Value&lt;/th&gt;&lt;th&gt;Delta&lt;/th&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_oublock&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_inblock&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_msgsnd&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_msgrcv&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_maxrss&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_ixrss&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_idrss&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_minflt&lt;/td&gt;&lt;td&gt;9872&lt;/td&gt;&lt;td&gt;9865&lt;/td&gt;&lt;td&gt; 7&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_majflt&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_nsignals&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_nvcsw&lt;/td&gt;&lt;td&gt;11344&lt;/td&gt;&lt;td&gt;11114&lt;/td&gt;&lt;td&gt; 230&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_nivcsw&lt;/td&gt;&lt;td&gt;977&lt;/td&gt;&lt;td&gt;968&lt;/td&gt;&lt;td&gt; 9&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_nswap&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_utime.tv_usec&lt;/td&gt;&lt;td&gt;865054&lt;/td&gt;&lt;td&gt;849053&lt;/td&gt;&lt;td&gt; 16001&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_utime.tv_sec&lt;/td&gt;&lt;td&gt;16&lt;/td&gt;&lt;td&gt;16&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_stime.tv_usec&lt;/td&gt;&lt;td&gt;556097&lt;/td&gt;&lt;td&gt;552097&lt;/td&gt;&lt;td&gt; 4000&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_stime.tv_sec&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt;1&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=3&gt;Total Execution Time&lt;/td&gt;&lt;td&gt;0.18323707580566&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;As you can see, to execute this from PHP it took 100 ms, 100s pages reclaimed and 200s voluntary context switches to query 35 servers.&lt;br /&gt;&lt;br /&gt;Now let’s look at Asynchronous execution of SELECT 1; // the query generation is from PHP yet the execution is performed on a server that executes the query in parallel&lt;br /&gt;&lt;table border=1&gt;&lt;tr&gt;&lt;th&gt;Field&lt;/th&gt;&lt;th&gt;End Value&lt;/th&gt;&lt;th&gt;Start Value&lt;/th&gt;&lt;th&gt;Delta&lt;/th&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_oublock&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_inblock&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_msgsnd&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_msgrcv&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_maxrss&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_ixrss&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_idrss&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_minflt&lt;/td&gt;&lt;td&gt;9131&lt;/td&gt;&lt;td&gt;9121&lt;/td&gt;&lt;td&gt; 10&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_majflt&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_nsignals&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_nvcsw&lt;/td&gt;&lt;td&gt;3891&lt;/td&gt;&lt;td&gt;3889&lt;/td&gt;&lt;td&gt; 2&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_nivcsw&lt;/td&gt;&lt;td&gt;290&lt;/td&gt;&lt;td&gt;290&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_nswap&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_utime.tv_usec&lt;/td&gt;&lt;td&gt;596287&lt;/td&gt;&lt;td&gt;596287&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_utime.tv_sec&lt;/td&gt;&lt;td&gt;4&lt;/td&gt;&lt;td&gt;4&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_stime.tv_usec&lt;/td&gt;&lt;td&gt;460028&lt;/td&gt;&lt;td&gt;460028&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;ru_stime.tv_sec&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt;0&lt;/td&gt;&lt;td&gt; 0&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=3&gt;Total Execution Time&lt;/td&gt;&lt;td&gt;0.019363880157471&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;As you can see from the table above executing the query asynchronously produced results with less context switching, less pages reclaimed and almost 10 times execution improvement over the synchronous query counterpart.&lt;br /&gt;How is the asynchronous query executed? Lets take a look at the figure below.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.flickr.com/photos/dathan/4090681457/" title="Async by dathan, on Flickr"&gt;&lt;img src="http://farm3.static.flickr.com/2731/4090681457_150aeb59fe.jpg" alt="Async" height="361" width="500" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;So A user comes through the firewall / load balancer with a HTTP Request to the www pool that runs PHP. PHP now makes a CURL request to the Async Shard Servers (through a LB same LB different PORT). The HTTP Request to the Async Shard Server contains the SQL we wish to execute. The Async Shard Servers has a thread per shard and executes the request in parallel. The results are merged and sent to the calling CURL process via JSON. The returned JSON is then converted into a PHP object. This is a typical three-tier environment.&lt;br /&gt;&lt;br /&gt;When having to query multiple servers using an Asynchronous Tier is dramatically faster; in fact its as fast as the slowest server. This is the main sticking point of why asynchronous queries are faster then synchronous queries (in this context) since the total execution time for serial queries is the SUM of all the query execution.&lt;br /&gt;&lt;br /&gt;The current version of the server is used for Friend Query execution across the datalayer. Its been solid for a few months now, and I'm currently getting permission to release it as an Open Source Product. The features this server contains:&lt;br /&gt;&lt;UL&gt;&lt;br /&gt;&lt;LI&gt; Lightweight &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; CPU bounded &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; Scales Linearly &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; A Timer Thread to keep the database config up to date in memory and fetching the config from PHP so if PHP changes connections to the shards so does Java &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; Uses Java-6 Executor Service &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; Merges the result set prior to sending it to the calling process &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; Communicates via JSON &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; Uses MySQL Connector/J &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; Supports a high concurrency &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; Optimized thread usage &lt;/LI&gt;&lt;br /&gt;&lt;/UL&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-4709619962141971609?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/4709619962141971609/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=4709619962141971609' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4709619962141971609'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4709619962141971609'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/11/asynchronous-queries-verses-synchronous.html' title='Asynchronous Queries verses Synchronous  Queries'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://farm3.static.flickr.com/2731/4090681457_150aeb59fe_t.jpg' height='72' width='72'/><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3230876226833790632</id><published>2009-10-09T15:34:00.001-07:00</published><updated>2009-10-12T11:23:11.861-07:00</updated><title type='text'>Steps I take before upgrading mysql</title><content type='html'>I am not a fan of upgrading mySQL unless I need to. I am of the mind if it is not broke don't fix it, but when I do upgrade I follow these general steps.&lt;br /&gt;&lt;br /&gt;If I have run into a mySQL bug, I look to see if that bug is fixed by searching the mySQL bug database.&lt;br /&gt;&lt;br /&gt;If I've notice a performance bottleneck, I look to see if the performance bottleneck has been fixed by searching the same database.&lt;br /&gt;&lt;br /&gt;I will NOT upgrade to the latest and greatest version of mySQL (5.4) I stay within my branch (5.0).&lt;br /&gt;&lt;br /&gt;These are my three general motivations that drive my upgrade decisions. Anytime I upgrade I also make a list of things that might affect my environment for the stuff I use.&lt;br /&gt;&lt;br /&gt;&lt;OL&gt;Here are my steps:&lt;br /&gt;&lt;LI&gt; Check the &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-x.html"&gt;change log &lt;/a&gt;&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; Ignore all the NDB changes... I don't use it and that's the majority of fixes. This is also, why I do not use it.&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; List the changes that will affect the production environment&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; Deploy the version that I picked on a few servers running my original config&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; Do data corruption tests (make sure my checksum scripts return the same data)&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; Verify that the problem I'm trying to fix is fixed&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; Deploy to more boxes&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; Let the new server bake for a period of no less than a week&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; Deploy everyplace&lt;br /&gt;&lt;br /&gt;&lt;/OL&gt;&lt;br /&gt;&lt;br /&gt;So now, I'm upgrading from 5.0.56 to 5.0.86. What I'm trying to fix is mysql memory overhead at high levels of ram.&lt;br /&gt;&lt;br /&gt;For instance, I have a slew of 48GB boxes. I set the bufferpool to 40GB; the OS uses 1 GB of memory (roughly) leaving an overhead of 7GB for the system cache and various spikes of sort buffers. Over a period, I see that mySQL will consume and hold onto 47GB of memory for an unknown reason even with some tight my.cnf settings. (I'm certain they are tight since I know what each buffer does). Therefore, testing some later versions of mySQL we found that these later versions do not grow past the settings defined yet performs the same.&lt;br /&gt;&lt;br /&gt;Next, since I decided that upgrading is a good solution, now it’s time to list all the changes that &lt;i&gt;fixes&lt;/i&gt; things.&lt;br /&gt;&lt;br /&gt;&lt;UL&gt;&lt;br /&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-58.html"&gt;5.0.58&lt;/a&gt; - &lt;a href="http://bugs.mysql.com/32149"&gt;INNODB performance fix&lt;/a&gt;&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-60.html"&gt;5.0.60&lt;/a&gt; - various problems that I should be affected by but havn't noticed so it’s fair to assume that said problems were introduced after my build. &lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-62.html"&gt;5.0.62&lt;/a&gt; - nothing major noticed the sp releases that's why I wait.&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-64.html"&gt;5.0.64 &lt;/a&gt;- nothing major&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-66.html"&gt;5.0.66&lt;/a&gt; - security fixes and fixes to fix the bugs introduced from this build.&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-67.html"&gt;5.0.67&lt;/a&gt; - two INNODB performance fixes and crash bug fixes.&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-68.html"&gt;5.0.68&lt;/a&gt; - changes show status and fixes an innodb crash bug.&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-70.html"&gt;5.0.70&lt;/a&gt; - fix another INNODB crash bug and security fixes &lt;/a&gt;&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-72.html"&gt;5.0.72&lt;/a&gt; - more general bug fixes&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-74.html"&gt;5.0.74&lt;/a&gt; - more stuff I don't care about&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-75.html"&gt;5.0.75&lt;/a&gt; - stuff given to Enterprise users now in community&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-76.html"&gt;5.8.76&lt;/a&gt; - more bug fixes that I do not need&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-78.html"&gt;5.0.78&lt;/a&gt; - more bug fixes I do not care about (run MS Access on windows not mySQL)&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-80.html"&gt;5.0.80&lt;/a&gt; - problem with error messages for concurrency limits that caused an assert failure&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/e/news5-0-82sp1.html"&gt;5.0.82&lt;/a&gt; - Fixes to fix fixes for this build. &lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-83.html"&gt;5.0.83&lt;/a&gt; more minor fixes that I don't seem to have a problem from&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-84.html"&gt;5.0.84&lt;/a&gt;- more bug fixes for INNODB and latches&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-85.html"&gt;5.0.85&lt;/a&gt; - looks like windows fixes&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt; &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/news-5-0-85.html"&gt;5.0.86&lt;/a&gt; - fixes that I'm not having problems with&lt;br /&gt;&lt;/UL&gt;&lt;br /&gt;&lt;br /&gt;Therefore, overall, upgrading should give me a boost in performance. My own internal testing sees some tighter memory usage, even though this is not fixed explicitly, the product has matured overall so I can account for the reduction in memory to that.&lt;br /&gt;&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3230876226833790632?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3230876226833790632/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3230876226833790632' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3230876226833790632'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3230876226833790632'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/10/steps-i-take-before-upgrading-mysql.html' title='Steps I take before upgrading mysql'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-2624506898194084838</id><published>2009-09-09T11:30:00.001-07:00</published><updated>2009-09-09T11:59:17.169-07:00</updated><title type='text'>Nagios Event Handlers - Love them</title><content type='html'>What is Nagios? &lt;a href="http://www.nagios.org/"&gt;Nagios&lt;/a&gt; IMHO is the best Open Source monitoring system out there. It supports hosts checks, a level to determine on a host level if a box is considered "up". It supports service check, a level to determine if a particular service such as mySQL is up. It has features to log all events to a flat file or to a DB. It can notify you when a service is in a warning state, error state or unknown state.&lt;br /&gt;&lt;br /&gt;For the purpose of this article, I am going to talk about handling events such as a clearing up swap.&lt;br /&gt;&lt;br /&gt;First, let us look at some configuration of Nagios. We are going to define a command, then service acting on that command. Let us assume that the nagios install is in /usr/local/nagios.&lt;br /&gt;&lt;br /&gt;Therefore, in /usr/local/nagios/ a few configuration files are key:&lt;br /&gt;- /usr/local/nagios/etc/objects/commands.cfg - the command file where the checks are defined&lt;br /&gt;- /usr/local/nagios/etc/hosts/*/hosts.cfg - the services file where the checks are defined for execution based on other directives in this file.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;A command:&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;# 'check_local_swap' command definition&lt;br /&gt;define command{&lt;br /&gt;        command_name    check_local_swap&lt;br /&gt;        command_line    $USER1$/check_swap -w $ARG1$ -c $ARG2$&lt;br /&gt;}&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;This says that check_local_swap executes check_swap with a warning threshold of $ARG1 and a critical threshold or $ARG2&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Next when defining a service for a host&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;define service{&lt;br /&gt;        use                             generic-service; Name of service template to use&lt;br /&gt;        host_name                       dbfacebook34b ; hostname&lt;br /&gt;        service_description             SYS:Swap ; what shows up in alerts&lt;br /&gt;        is_volatile                     0 &lt;br /&gt;        check_period                    24x7 ; threshold when to check (all the time)&lt;br /&gt;        max_check_attempts              4 ; threshold to check before marking state&lt;br /&gt;        event_handler                   handle-swap ; handle an event (another command)&lt;br /&gt;        normal_check_interval           5 ; in seconds&lt;br /&gt;        retry_check_interval            1 ; only try once before reporting the state&lt;br /&gt;        contact_groups                  itops ; contact group to send notifications to&lt;br /&gt;        notification_options            w,u,c,r ; need to look this up for all defs&lt;br /&gt;        notification_interval           600 ; retry sending notifs every 8 mins&lt;br /&gt;        notification_period             24x7 ; keep sending them&lt;br /&gt;        check_command                   check_nrpe!check_local_swap!80%!55% ; execute the event handler and warn like hell&lt;br /&gt;}&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Lots of goodies as you can see. Let us look at the event handler&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;define command{&lt;br /&gt;        command_name    handle-swap&lt;br /&gt;        command_line    /home/scripts/handle_swap.pl&lt;br /&gt;}&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;This means execute this script whenever any event for swap occurs (I decided to make this simple and not put a threshold on this).&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;What does handle_swap.pl do - well it’s a perl script that looks at free memory and if only a few 100K of swap is in use, swapoff -a; swapon -a;&lt;br /&gt;&lt;br /&gt;In this case, it is a bit safe to do this. Why do this? Why not just turn of swap. I have talked in depth about this subject-but for a minor recap. Linux needs swap else, kswapd will freak out. Swap in DB's is bad so I clean it up automatically since O_DIRECT on my SAN is not an option.&lt;br /&gt;&lt;br /&gt;Why not just run a cron job? Nagios keeps a log, I like to review what is happening from a central location, and nagios is freaking COOL.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-2624506898194084838?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/2624506898194084838/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=2624506898194084838' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2624506898194084838'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2624506898194084838'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/09/nagios-event-handlers-love-them.html' title='Nagios Event Handlers - Love them'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8130630633072762098</id><published>2009-08-21T11:02:00.000-07:00</published><updated>2009-08-21T11:31:45.684-07:00</updated><title type='text'>Back From Vacation and Man do I feel great.</title><content type='html'>For the first time ever I took a two-week vacation. How can a person who has 100s of database servers, 100s of web servers, and a system that supports 100K tps across 20 TB of data is able to take a vacation? Easy, I have a great team that is very competent in managing the platform by following our cookbook routines and guidelines for new application interaction with the databases.&lt;br /&gt;&lt;br /&gt;Where did I go? I went to NYC-stayed in Tribeca and only ate Pizza, Hotdogs, White Castle and Hala Food from vendors that are nearly on every street corner. I also lost 6 pounds! After three months of half-ass dieting to prepare for my Pizza diet, who would have known that just going for it was the key to success! How did I do it? Well, first I walked to Little Italy, bought 2-4 slices, ate them, and then walked around the city. That is it. As a Database Dude, I do not do much walking except for when I am too tired to run on the treadmill for 20 mins.  I was averaging around 10 miles a day for a 12-hour period walking around NYC taking pictures, enjoying the sites, eating, drinking, and then repeat. I even walked from Canal Street all the way to the Staten Island Ferry just because. Man it was great. &lt;br /&gt;After NYC, I went to Puerto Rico for the second time. My family and I had a freaking awesome time. We went to the only US National Park that is a Rain Forest. Went to Vieques to the Bio Luminous Bay, where the single cell organisms light up when kinetic energy is applied to them. Went to Culebra, to Flamenco Beach – which is off the hook: much better than any other beach I have been to. Drove around the entire Main Puerto Rican Island stopping off in Rincon and eating every 3-4 hours.&lt;br /&gt;I am fully rested and feel like a 20 year old again. So, expect a lot more posts. I think the order will be:&lt;br /&gt;&lt;br /&gt;Migrate 20 TB of data into a new Format without downtime.&lt;br /&gt;How to make Friend Queries work with a database.&lt;br /&gt;Throughput increase from Asynchronous Queries.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8130630633072762098?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8130630633072762098/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8130630633072762098' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8130630633072762098'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8130630633072762098'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/08/back-from-vacation-and-man-do-i-feel.html' title='Back From Vacation and Man do I feel great.'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-2248996121750136841</id><published>2009-06-29T13:29:00.000-07:00</published><updated>2009-06-29T13:30:55.725-07:00</updated><title type='text'>mysql UC 2009 Talk</title><content type='html'>Scribe is a bit buggy with displaying this presentation:&lt;br /&gt;&lt;br /&gt;&lt;a title="View Scaling a Widget Company on Scribd" href="http://www.scribd.com/doc/16934181/Scaling-a-Widget-Company" style="margin: 12px auto 6px auto; font-family: Helvetica,Arial,Sans-serif; font-style: normal; font-variant: normal; font-weight: normal; font-size: 14px; line-height: normal; font-size-adjust: none; font-stretch: normal; -x-system-font: none; display: block; text-decoration: underline;"&gt;Scaling a Widget Company&lt;/a&gt; &lt;object codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=9,0,0,0" id="doc_643907886416336" name="doc_643907886416336" classid="clsid:d27cdb6e-ae6d-11cf-96b8-444553540000" align="middle" height="500" width="100%" &gt;  &lt;param name="movie" value="http://d.scribd.com/ScribdViewer.swf?document_id=16934181&amp;access_key=key-20juy0gcpq7hvlqze92y&amp;page=1&amp;version=1&amp;viewMode="&gt;   &lt;param name="quality" value="high"&gt;   &lt;param name="play" value="true"&gt;  &lt;param name="loop" value="true"&gt;   &lt;param name="scale" value="showall"&gt;  &lt;param name="wmode" value="opaque"&gt;   &lt;param name="devicefont" value="false"&gt;  &lt;param name="bgcolor" value="#ffffff"&gt;   &lt;param name="menu" value="true"&gt;  &lt;param name="allowFullScreen" value="true"&gt;   &lt;param name="allowScriptAccess" value="always"&gt;   &lt;param name="salign" value=""&gt;        &lt;embed src="http://d.scribd.com/ScribdViewer.swf?document_id=16934181&amp;access_key=key-20juy0gcpq7hvlqze92y&amp;page=1&amp;version=1&amp;viewMode=" quality="high" pluginspage="http://www.macromedia.com/go/getflashplayer" play="true" loop="true" scale="showall" wmode="opaque" devicefont="false" bgcolor="#ffffff" name="doc_643907886416336_object" menu="true" allowfullscreen="true" allowscriptaccess="always" salign="" type="application/x-shockwave-flash" align="middle"  height="500" width="100%"&gt;&lt;/embed&gt; &lt;/object&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-2248996121750136841?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/2248996121750136841/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=2248996121750136841' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2248996121750136841'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2248996121750136841'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/06/mysql-uc-2009-talk.html' title='mysql UC 2009 Talk'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8184607162944041205</id><published>2009-06-11T16:06:00.000-07:00</published><updated>2009-06-11T16:42:12.439-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='apc'/><category scheme='http://www.blogger.com/atom/ns#' term='php'/><category scheme='http://www.blogger.com/atom/ns#' term='apache'/><category scheme='http://www.blogger.com/atom/ns#' term='connect code'/><title type='text'>PHP mysql connect and using APC to recover</title><content type='html'>So, when you connect on the fly to a database your subject to a variety of issues, like when the db is not available and when the db does not have a route.&lt;br /&gt;&lt;br /&gt;One of the main reasons why a dev may want to connect on the fly is because they have too many front ends to hold a persistent connection on the backends. Since mySQL does not use libevent, holding open threads to mySQL is much more costly. Threads == Memory.&lt;br /&gt;&lt;br /&gt;But, that's here nor there. The main purpose of this post is to talk about how to recover from failed connections that block apache threads.&lt;br /&gt;&lt;br /&gt;Common Failures:&lt;br /&gt;No route to Host&lt;br /&gt;Flapping NIC&lt;br /&gt;Locked Tables&lt;br /&gt;Recovering from a Crash&lt;br /&gt;more of the same.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;My Environment:&lt;br /&gt;I have a bunch of webservers (200+) that all have 300 possible threads (60000 possible connections to a single DB) behind a load balancer that uses the LB least connections protocol to distribute load across all 200+ webservers.&lt;br /&gt;&lt;br /&gt;Since PHP is used in a stateless mode (no guarentee that the same user will hit the same server), I have to have some way of telling all other apache procs for said box that a server is down. I use stateless on the fly connections, so each apache proc will test the connection.&lt;br /&gt;&lt;br /&gt;Here in lies my most common problem. If a box in the backend dies, all apache threads will block for a predefined time-out.&lt;br /&gt;&lt;br /&gt;In /etc/php.ini (it could be in /etc/php.d/mysql.ini) I set &lt;br /&gt;&lt;br /&gt;mysql.connect_timeout = 5 // the default is 60&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;In my common_db class when connecting to a database, the connect routine returns a database handle object&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;if (PEAR::isError($dbh) &amp;&amp; $delta_to_connect &gt;= 5){&lt;br /&gt;      &lt;br /&gt;      #&lt;br /&gt;      # mark ip as dead for 15 min&lt;br /&gt;      #&lt;br /&gt;      &lt;br /&gt;      apc_store($ip, array('DEAD'), 900);&lt;br /&gt;      return false;&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;return $dbh;&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now PRIOR to calling the database connect code I check to see if the IP is up.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;$status = apc_fetch($ip);&lt;br /&gt;if ($status[0] === 'DEAD'){&lt;br /&gt;   return false;&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;... do connect ...&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;But, there could be a variety of issues that can call false positives, like network flaps, someone blocking the db for some time etc. So, I'll allow one request .1% of the time bypass the status check to try again.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;if ($status[0] === 'DEAD' &amp;&amp; (mt_rand(0, 1000) != 1)){&lt;br /&gt;    return false;&lt;br /&gt;}&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;But, if your app can't connect to the DB aren't you down anyway?&lt;br /&gt;&lt;br /&gt;Yes, although I don't have to restart all the httpds.&lt;br /&gt;Also each DB has a redundant pair to when returning false, the app code will try the other set of servers.&lt;br /&gt;&lt;br /&gt;I have a variety of methods to deal with these scenarios but this is the quickest to implement.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8184607162944041205?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8184607162944041205/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8184607162944041205' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8184607162944041205'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8184607162944041205'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/06/php-mysql-connect-and-using-apc-to.html' title='PHP mysql connect and using APC to recover'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-4765700555798824002</id><published>2009-05-19T12:26:00.000-07:00</published><updated>2009-05-19T12:56:34.369-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='filesort'/><category scheme='http://www.blogger.com/atom/ns#' term='order by'/><category scheme='http://www.blogger.com/atom/ns#' term='optimization'/><title type='text'>Multi Direction Sorts and avoiding a file sort</title><content type='html'>There are two PRIMARY directions to sort data in SQL: Ascending (ASC) and Descending DESC.&lt;br /&gt;When these two sort definitions are put together in a single statement a filesort is produced.&lt;br /&gt;&lt;br /&gt;Why do we want to avoid filesorts?&lt;br /&gt;&lt;br /&gt;Filesorts are bad. 1st they tickle a thread based buffer called  sort_buffer_size. Additionally filesorts reads the data twice, unless max_length_for_sort_data limit is reached and as a result the Filesort runs slower to reduce disk I/O. If you want filesorts to run faster at the expense of the disk increase the default max_length_for_sort_data. You can read the filesort algorithm &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/order-by-optimization.html"&gt;here.&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;So, here is an example&lt;br /&gt;&lt;pre&gt;&lt;br /&gt; CREATE TABLE `ABCD` (&lt;br /&gt;  `A` int(10) unsigned NOT NULL default '0',&lt;br /&gt;  `B` int(10) unsigned NOT NULL default '0',&lt;br /&gt;  `C` int(10) unsigned NOT NULL default '0',&lt;br /&gt;  `D` int(10) unsigned NOT NULL default '0',&lt;br /&gt;  PRIMARY KEY  (`a`,`b`,`c`,`d`)&lt;br /&gt;) ENGINE=InnoDB DEFAULT CHARSET=latin1&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;mysql&gt; explain SELECT * FROM ABCD WHERE a=1 AND b=1 ORDER BY c DESC, d ASC\G      &lt;br /&gt;&lt;br /&gt;*************************** 1. row ***************************&lt;br /&gt;           id: 1&lt;br /&gt;  select_type: SIMPLE&lt;br /&gt;        table: ABCD&lt;br /&gt;         type: ref&lt;br /&gt;possible_keys: PRIMARY&lt;br /&gt;          key: PRIMARY&lt;br /&gt;      key_len: 8&lt;br /&gt;          ref: const,const&lt;br /&gt;         rows: 2&lt;br /&gt;        &lt;span style="font-weight:bold;"&gt;Extra: Using where; Using index; Using filesort&lt;/span&gt;&lt;br /&gt;1 row in set (0.00 sec)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Notice the filesort? So how does one get around this filesort?&lt;br /&gt;&lt;br /&gt;Well&lt;br /&gt;&lt;br /&gt;Let's define some roles for columns C and D. C is the parent while D is the child.&lt;br /&gt;&lt;br /&gt;&lt;LI/&gt;We want all the latest parents (C)&lt;br /&gt;&lt;LI/&gt;We want all the oldest children (D)&lt;br /&gt;&lt;br /&gt;We require pagination of all the PARENTS (show 10 parents per page) so Queries like this is PRODUCED&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;SELECT * FROM ABCD WHERE A=? AND B=? ORDER BY C DESC&lt;br /&gt;explain SELECT * FROM ABCD WHERE a=1 AND b=1 ORDER BY c DESC LIMIT 10\G   &lt;br /&gt;*************************** 1. row ***************************&lt;br /&gt;           id: 1&lt;br /&gt;  select_type: SIMPLE&lt;br /&gt;        table: ABCD&lt;br /&gt;         type: ref&lt;br /&gt;possible_keys: PRIMARY&lt;br /&gt;          key: PRIMARY&lt;br /&gt;      key_len: 8&lt;br /&gt;          ref: const,const&lt;br /&gt;         rows: 2&lt;br /&gt;        &lt;span style="font-weight:bold;"&gt;Extra: Using where; Using index&lt;/span&gt;&lt;br /&gt;1 row in set (0.00 sec)&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now&lt;br /&gt;&lt;br /&gt;FOREACH($C_parent as $i =&gt; $c_id) {&lt;br /&gt;&lt;br /&gt;$C_parent[$i] = SELECT SQL_CALC_FOUND_ROWS * FROM ABCD WHERE A=? AND B=? AND C=$c_id ORDER BY D ASC LIMIT 1;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;So, we changed 1 query into 11 queries (10 parents per page) to make the page load happen faster, by getting rid of the filesort. &lt;br /&gt;&lt;br /&gt;What 11 queries is faster then 1? Yes, for this case it is. The reason is because filesorts are SLOOOOW, they chew up a lot of limited resources and they should be avoided. I've see filesorts take close to 50-60% of the query time.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-4765700555798824002?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/4765700555798824002/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=4765700555798824002' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4765700555798824002'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4765700555798824002'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/05/multi-direction-sorts-and-avoiding-file.html' title='Multi Direction Sorts and avoiding a file sort'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5951435469435700761</id><published>2009-04-23T12:23:00.001-07:00</published><updated>2009-04-23T13:12:43.728-07:00</updated><title type='text'>Restoring a backup of a Circular Replication config</title><content type='html'>So say you have two mySQL servers called A and B. A and B are in a circular replication ring meaning&lt;br /&gt;&lt;br /&gt;A replicates from B&lt;br /&gt;B replicates from A&lt;br /&gt;&lt;br /&gt;In addition, log-slave-updates is on, so if one server of either A or B goes down you can recover all the data from either side.&lt;br /&gt;&lt;br /&gt;Therefore, here are some steps.&lt;br /&gt;Take a snapshot of a server in the ring, with your favorite backup tool (mine is ibbackup)&lt;br /&gt;&lt;br /&gt;When either server A or B dies, restore from said backup. Here are the steps. Let us assume server A goes down, and backups are done from server B.&lt;br /&gt;&lt;br /&gt;restore the backup of server B to server A&lt;br /&gt;&lt;br /&gt;On server-A turn off log-slave-updates (you will see why on the next line)&lt;br /&gt;Add &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/replication-options-slave.html#option_mysqld_replicate-same-server-id"&gt;replicate-same-server-id&lt;/a&gt; on server A to my.cnf&lt;br /&gt;&lt;br /&gt;CHANGE MASTER TO to the position and file of the binary log that is reported from your tool on server-A.&lt;br /&gt;&lt;br /&gt;IF the binary log did not get corrupted on server-B your cool, else flip the binary log to the next binary log from server-A on server-B.&lt;br /&gt;&lt;br /&gt;Let server-A catch up&lt;br /&gt;Stop server-A&lt;br /&gt;Add log-slave-updates on server-A&lt;br /&gt;remove replicate-same-server-id on server A&lt;br /&gt;Start server-A&lt;br /&gt;&lt;br /&gt;done.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5951435469435700761?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5951435469435700761/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5951435469435700761' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5951435469435700761'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5951435469435700761'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/04/restoring-backup-of-circular.html' title='Restoring a backup of a Circular Replication config'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-9156053447541752987</id><published>2009-04-08T13:11:00.001-07:00</published><updated>2009-04-08T14:13:33.384-07:00</updated><title type='text'>Recover from No-Route to Host or Storage Engine Plugin Crashes</title><content type='html'>There are a finite set of cases where mySQL clients will hang on a connection-for a small period of time (seconds) or indefinitely. Most notably no-route to host causes a timeout to occur which in most clients are on the order of seconds to recovery.&lt;br /&gt;&lt;br /&gt;In a web environment if a database is connected to on the fly, each connection made should take milliseconds-when the net is healthy. However, when a database server crashes a timeout for each connection takes seconds and there are cases where connections can just hang (recovery of INNODB as an example). For instance have you ever had a ssh session hang and a kill (SIGHUP..) does not work? This happens for mySQL client connections too. &lt;br /&gt;&lt;br /&gt;I want the front ends to recover gracefully from a no-route to host, or more notably a hanged connection condition. I want to avoid that damn timeout all together on stateless connections. Why? Because if you use up all your worker httpd threads requests Fail-things crash, bosses get mad, everyone wakes up and you have a meeting about it later. So what are ways around this?&lt;br /&gt;&lt;br /&gt;Use a Load balancer&lt;br /&gt;&lt;dd&gt;This is cool and all, but is expensive since two or three are needed for HA purposes. In addition, this increases the complexity of managing servers, and most LBs are optimized for HTTPD traffic, not raw TCP traffic.&lt;br /&gt;&lt;/dd&gt;&lt;br /&gt;Use a memcache layer to keep DB state&lt;br /&gt;&lt;br /&gt;&lt;dd&gt;This is cool as well, but the state server, which collects the state of your servers and updates memcache, is now a single point of failure-and a lot of time is spent to make this resilient. Not to mention all clients across Programming languages (bash scripts, Java, PHP, Python, etc) need to have access to this memcache layer and logically handle the connection the same. This is doable, but there is an easier way.&lt;br /&gt;&lt;/dd&gt;&lt;br /&gt;Use IRON DNS:&lt;br /&gt;&lt;dd&gt;IRON DNS is a term I use to build an HA Resolver. IF a box fails a health check, a nagios event handler can tell IRON DNS to set the domain names IP address to 127.0.0.1 or a routable interface that produces a Connection Refused (111 instead of 110).&lt;br /&gt;&lt;/dd&gt;&lt;br /&gt;&lt;br /&gt;For an internal network, all database entries should be able to fit in memory. All resolves should take less then 2ms, and updates to DNS entries can happen in just a few seconds across your entire farm. The only drawback is if the DNS server fails to respond your site is boned. Making DNS Failure proof is rather easy-which is another post entirely.&lt;br /&gt;&lt;br /&gt;For my solution, I am using DNS. This allows me to recover servers in a shard on different boxes without having to change code. This also allows my environment to recover from blocked I/O events that can spike load on the front-end that make requests slow.&lt;br /&gt;&lt;UL&gt;&lt;br /&gt;Here are some conditions where connections take to long:&lt;br /&gt;&lt;LI&gt;No Route to Host&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt;INNODB recovery&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt;Disk Fails (disk fills up)&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt;Switch Fails / Flaps&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt;Plug-in storage engine crashes, yet mySQL is up&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt;DNS Resolve fails&lt;br /&gt;&lt;/UL&gt;&lt;br /&gt;&lt;br /&gt;If you have a better method, or want to advise me on flaws I should watch out for, drop a comment.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-9156053447541752987?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/9156053447541752987/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=9156053447541752987' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/9156053447541752987'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/9156053447541752987'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/04/recover-from-no-route-to-host-or.html' title='Recover from No-Route to Host or Storage Engine Plugin Crashes'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8974082837638323630</id><published>2009-03-31T11:05:00.000-07:00</published><updated>2009-03-31T14:26:49.502-07:00</updated><title type='text'>What do you think about adding ZLIB to memcache storage</title><content type='html'>Memcache is a fantastic Hash table-very fast and one of the great successes of Brad Fitzpatrick-who in my opinion has done more for the open social movement as an individual then anyone else. I use memcache quite extensively, now I am thinking about adding ZLIB native to compress the value of each key-much like how INNODB does with the Barracuda file format. The theory is with a CPU hit, we can store more data per memcache instance. I've talked to the Northscale guys and they love the idea. What do you think?&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Update: Well what do you know&lt;br /&gt;&lt;br /&gt;http://us3.php.net/manual/en/function.memcache-setcompressthreshold.php&lt;br /&gt;&lt;br /&gt;for PHP for instance compresses the data on the client side.&lt;br /&gt;&lt;br /&gt;There still might be some value compressing the data on the server-side, but now I'm not as motivated.&lt;br /&gt;&lt;br /&gt;What might be a good alternative is to compress keys into 8 byte longs in memcached automatically instead of the actual string that can be huge. To give some more detail,&lt;br /&gt;8 byte longs is a 64-bit int. A string can easily be converted into a big int by bit manipulation - and the address space is huge so key conflict is effectively removed.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8974082837638323630?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8974082837638323630/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8974082837638323630' title='9 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8974082837638323630'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8974082837638323630'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/03/what-do-you-think-about-adding-zlib-to.html' title='What do you think about adding ZLIB to memcache storage'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>9</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1155369617655540368</id><published>2009-03-23T10:38:00.000-07:00</published><updated>2009-03-23T13:22:29.357-07:00</updated><title type='text'>Cloud computing without spending money</title><content type='html'>Cloud computing is the big thing now days, weather you are an app developer using EC2 or the Google APP engine, or a new company trying to build your own cloud product. If you are hosting, or using a platform it costs money. I hate to spend money, especially money that is for my company. If I find an idle server, I use it to 100% utilization (prior to the saturation point).&lt;br /&gt;&lt;br /&gt;I needed to build a new application that periodically crawls a website to update various lists. Building a crawler is expensive, especially from scratch. First, you have to define the amount of lag that is allowed from the crawl copy and the real copy. Of course the Project Manager does not want any lag, all events must be caught and near real-time without overloading the source of the data-but I am not hating, it is a challenge. Next, what technology to use, what language to write the app in-what considerations are left to be defined? How does one crawl Gigs, Tera, and amount of data in a guarantee period? On top of that, how much additional hardware is this going to cost. In addition, to be a cloud it needs to have an API so app developers can set, and get consistent data in an expected period. This is a lot of freaking requirements.&lt;br /&gt;&lt;br /&gt;Therefore, to solve this issue, I know that mySQL will store the data, but getting the data is the hard part. This is what is going to cost money, lots of it. I looked around common architectures and found that nothing would do what I wanted to do in a cost effective manner. So, I designed my own using Seti @ Home as the basis for the design. &lt;br /&gt;&lt;br /&gt;Get to the point already Dathan:&lt;br /&gt;&lt;br /&gt;I have turned ever user who views my applications into a collector, using idle bandwidth without knowing who is collecting the data. My user base on spare cycles will fetch a feed of my choosing, and sends that data to my servers without any personal information. Instead of using an Amazon or Google service, I have turned my user base into a cloud to service their needs.&lt;br /&gt;&lt;br /&gt;It is rather awesome-I must say. I am able to service the needs of more than 60 million users at the cost of development time, and NO NEW HARDWARE. The cloud does not have to be a service provider-it can be the end user as long as the end user is not impacted by the requests. BTW the team that I manage is freaking awesome-they built my vision with trial and error and a hand waved spec.&lt;br /&gt;&lt;br /&gt;Currently the system scales as long as there is enough end users. If I lose all my users then well I am boned, but to support the feeds all I need is 100K nodes at the current rate. With 60 million end nodes, I am cool.&lt;br /&gt;&lt;br /&gt;Imagine if Google with Adsense used this install base to tell Google if the data has changed for an arbitrary web address. All it needs is a few people to hit the same url, inform Google that the web address in question has a different checksum, then at that point Google’s crawlers can go fetch it, instead of constantly crawling data that doesn't change. Google would be able to reduce overall server cost significantly, if it just knew what data has changed instead of guessing what data has changed.&lt;br /&gt;&lt;br /&gt;By next years Velocity conferance I hope to have a full disclosure on what technology my team used, how my team get around cross domain issues, and how to compute checksums to validate the data.&lt;br /&gt;&lt;br /&gt;PS - I designed this, with my team we made it much better and one person implemented it and owns the product from this point on.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1155369617655540368?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1155369617655540368/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1155369617655540368' title='9 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1155369617655540368'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1155369617655540368'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/03/cloud-computing-without-spending-money.html' title='Cloud computing without spending money'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>9</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-979008481722196172</id><published>2009-03-12T13:58:00.000-07:00</published><updated>2009-03-16T10:03:23.168-07:00</updated><title type='text'>Walking an INNODB table Fast</title><content type='html'>Walking a table means, traversing each row, commonly used in building queues, fixing data, or dumping a table. I've recently ran into a problem-caused by an assumption, where walking a table was taking way to long using the method&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;$pos = 0;&lt;br /&gt;do {&lt;br /&gt;&lt;br /&gt;    $result = SELECT col FROM TABLE LIMIT $pos, 1000;&lt;br /&gt;    $pos += 1000;&lt;br /&gt;} while ($result);&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;The assumption was since INNODB uses a cluster index, this would traverse the table using the PRIMARY key. This is not the case, &lt;b&gt;its not a problem in INNODB&lt;/b&gt; but a bad assumption, that I fell victim to. A table scan to each $pos occurs producing a Big-O of N^2. So, when the query:&lt;br /&gt;&lt;br /&gt;SELECT col FROM TABLE LIMIT 1000000, 1000 is executed mySQL will scan all the rows up to row position 1001000 and for each subsequent iteration.&lt;br /&gt;&lt;br /&gt;This is SLOOOOW. IMHO since the table is sorted by the primary key, mySQL should optimize this case - but it does not and will not. So, to walk an INNODB table fast, and keep liner time or a Big-O of N an alternative is&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;$last_id = 0&lt;br /&gt;do {&lt;br /&gt;&lt;br /&gt;  $result = SELECT col FROM TABLE USE INDEX(PRIMARY) WHERE pkey_part &gt; $last_id LIMIT 1000&lt;br /&gt;  $last_id = $result[count($result) - 1]-&gt;pkey_part&lt;br /&gt;&lt;br /&gt;}while($result);&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;This dumps a table very fast, almost as fast as doing a count(*) on the PRIMARY KEY.&lt;br /&gt;&lt;br /&gt;Another method is to &lt;br /&gt;&lt;br /&gt;SELECT col INTO OUTFILE "/dir/file.ids" FROM TABLE;&lt;br /&gt;&lt;br /&gt;but the data is local to the database - thus the need for the application to grab data. Another draw back of this method is that the dump produces more disk IO then walking a table off of a key, slowing down access to this table.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;In conclusion, even if the storage engine keeps the table order consistent like INNODB does, do not assume that LIMIT 100000, 1000 is equivalent to a file seek of position 100000, without telling the Optimizer to use an index.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-979008481722196172?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/979008481722196172/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=979008481722196172' title='12 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/979008481722196172'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/979008481722196172'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/03/walking-innodb-table-fast.html' title='Walking an INNODB table Fast'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>12</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-982060107299210055</id><published>2009-02-09T15:32:00.000-08:00</published><updated>2009-02-09T15:42:38.253-08:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='shard'/><category scheme='http://www.blogger.com/atom/ns#' term='facebook'/><category scheme='http://www.blogger.com/atom/ns#' term='presentation'/><category scheme='http://www.blogger.com/atom/ns#' term='opensocial'/><category scheme='http://www.blogger.com/atom/ns#' term='myspace'/><title type='text'>Speaking at Mysql Conf 2009: Architecture and Technology, Cloud Computing, LAMP, Replication and Scale-Out</title><content type='html'>I'll be going into detail what is Sharding, how to Shard, pitfalls of Sharding, performance/throughput gains, shard roles, and performance scaling in general. I hope to make this the most comprehensive talk to date on the subject in 45 min.&lt;br /&gt;&lt;br /&gt;The topic is called &lt;a href="http://en.oreilly.com/mysql2009/public/schedule/detail/5337"&gt;Scaling a Widget Company&lt;/a&gt;. I'll detail how I setup the data layer for Rockyou. How many transactions per second Rockyou is at, what the infrastructure is comprised of, how 99.999% uptime is achieved and hopefully get into BCP which I probably will not have time to go over.&lt;br /&gt;&lt;br /&gt;If you want me to focus on specific aspects on the subject of shard'ing let me know and I will :).&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-982060107299210055?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/982060107299210055/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=982060107299210055' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/982060107299210055'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/982060107299210055'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/02/speaking-at-mysql-conf-2009.html' title='Speaking at Mysql Conf 2009: Architecture and Technology, Cloud Computing, LAMP, Replication and Scale-Out'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1497036549037669925</id><published>2009-02-03T13:56:00.000-08:00</published><updated>2009-09-16T11:39:26.230-07:00</updated><title type='text'>How to reduce load and response time in PHP in five minutes</title><content type='html'>Request time is proportional to server load. If the application response time is big so will be the server's load. To reduce server load, reduce the wait time in the application's response time to serve the request. Below are some steps that I took to remove 1.8 ms overhead on every request to my web server farm. &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Tools Needed:&lt;br /&gt;vi&lt;br /&gt;strace&lt;br /&gt;top&lt;br /&gt;&lt;br /&gt;Use vi to look at your include path in php.init.&lt;br /&gt;Next use top to find which apache process is consuming the most cpu resources.&lt;br /&gt;Use strace -p [TOP HTTPD PROCESS] -T (-T is for deltas).&lt;br /&gt;&lt;br /&gt;In my example the include path is &lt;br /&gt;/usr/share/pear:/usr/lib64/pear:.:/var/www/html/httdocs/&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;lstat("/usr", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 &lt;0.000033&gt;&lt;br /&gt;lstat("/usr/share", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 &lt;0.000034&gt;&lt;br /&gt;lstat("/usr/share/pear", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 &lt;0.000034&gt;&lt;br /&gt;lstat("/usr/share/pear/ams", 0x7fbfff1690) = -1 ENOENT (No such file or directory) &lt;0.000033&gt;&lt;br /&gt;open("/usr/share/pear/ams/include/FreqCapInfo.php", O_RDONLY) = -1 ENOENT (No such file or directory) &lt;0.000036&gt;&lt;br /&gt;lstat("/usr", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 &lt;0.000037&gt;&lt;br /&gt;lstat("/usr/lib64", {st_mode=S_IFDIR|0755, st_size=65536, ...}) = 0 &lt;0.000034&gt;&lt;br /&gt;lstat("/usr/lib64/php", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 &lt;0.000034&gt;&lt;br /&gt;lstat("/usr/lib64/php/pear", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0 &lt;0.000038&gt;&lt;br /&gt;lstat("/usr/lib64/php/pear/ams", 0x7fbfff1690) = -1 ENOENT (No such file or directory) &lt;0.000037&gt;&lt;br /&gt;open("/usr/lib64/php/pear/ams/include/FreqCapInfo.php", O_RDONLY) = -1 ENOENT (No such file or directory) &lt;0.000038&gt;&lt;br /&gt;open("/var/www/html/ams/include/FreqCapInfo.php", O_RDONLY) = 24 &lt;0.000043&gt;&lt;br /&gt;fstat(24, {st_mode=S_IFREG|0775, st_size=6707, ...}) = 0 &lt;0.000031&gt;&lt;br /&gt;stat("./ams/include/FreqCapInfo.php", 0x7fbfff4778) = -1 ENOENT (No such file or directory) &lt;0.000037&gt;&lt;br /&gt;stat("/usr/share/pear/ams/include/FreqCapInfo.php", 0x7fbfff4778) = -1 ENOENT (No such file or directory) &lt;0.000038&gt;&lt;br /&gt;stat("/usr/lib64/php/pear/ams/include/FreqCapInfo.php", 0x7fbfff4778) = -1 ENOENT (No such file or directory) &lt;0.000036&gt;&lt;br /&gt;stat("/var/www/html/ams/include/FreqCapInfo.php", {st_mode=S_IFREG|0775, st_size=6707, ...}) = 0 &lt;0.000040&gt;&lt;br /&gt;close(24)                               = 0 &lt;0.000033&gt;&lt;br /&gt;mlock(0x552b876be0, 24)                 = 0 &lt;0.000092&gt;&lt;br /&gt;mlock(0x552b8df910, 10624)              = 0 &lt;0.000038&gt;&lt;br /&gt;munlock(0x552b876be0, 24)               = 0 &lt;0.000036&gt;&lt;br /&gt;munlock(0x552b8df910, 10624)            = 0 &lt;0.000032&gt;&lt;br /&gt;mlock(0x552b876be0, 24)                 = 0 &lt;0.001707&gt;&lt;br /&gt;mlock(0x552b8df910, 10624)              = 0 &lt;0.000009&gt;&lt;br /&gt;munlock(0x552b876be0, 24)               = 0 &lt;0.000007&gt;&lt;br /&gt;munlock(0x552b8df910, 10624)            = 0 &lt;0.000007&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Looking at the strace, 15 unneeded system calls are made on every request, each request roughly takes 30 micro seconds, for a total of a few ms wasted on every request. False positives are adding overhead to ever requests since the include path is not optimized. A Bloom Filter in Shared Memory would be perfect for for this part of PHP-but that's besides the point.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;So, change your include path&lt;br /&gt;.:/var/www/html/httdocs/:/usr/share/pear:/usr/lib64/pear&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;For my example I changed the include path to the above. In my environment we don't do many PEAR loads, so it makes sense to use our directories 1st.&lt;br /&gt;&lt;br /&gt;By doing this 15 erroneous system calls have been removed. Note: If you use the php feature __autoload make sure to protect your calls with file exist or you may be doing a require once on a file that is in a different directory which is a PHP fatal Error.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;UPDATED: Some more goodies to reduce load on PHP boxes:&lt;br /&gt;&lt;br /&gt;For PHP 5.2 there is a nice new feature that you can play with&lt;br /&gt;&lt;br /&gt;realpath_cache_size. This is a directive native to PHP by default it's set to 16K, this means that PHP expects there to be very few files, but big files. In most environments I have been in this is not the case. The case is there are a lot of files, a lot of directories thus 16K is not enough.&lt;br /&gt;&lt;br /&gt;I've tested a few values but 128K seems to be my sweet spot.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://us.php.net/manual/en/ini.core.php"&gt;realpath_cache_size&lt;/a&gt; = 128K&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1497036549037669925?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1497036549037669925/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1497036549037669925' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1497036549037669925'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1497036549037669925'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/02/how-to-reduce-load-and-response-time-in.html' title='How to reduce load and response time in PHP in five minutes'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-4681450815565669987</id><published>2009-01-27T15:30:00.000-08:00</published><updated>2009-01-27T16:50:55.498-08:00</updated><title type='text'>Shard Types  &amp;&amp; innodb_io_capacity, innodb_write_io_threads, innodb_read_io_threads</title><content type='html'>Another project that I am doing introduces a new shard type. First, think of a Shard as RAID-10 on your database layer. The data is striped across N servers and mirrored for failure recovery.&lt;br /&gt;&lt;br /&gt;Shard Types are Shards that serve specific purposes. For instance, I have an archive shard. This shard keeps data the is rarely ever read on even cheaper hardware-with slower disks and the data is naively compressed using the innodb_file_format=Barracuda&lt;br /&gt;option. If and/or when a person requests data that is archived, my software layer detects this case and migrates the archive data from the super crappy RAID-5 servers to the less crappy RAID-10 servers with 32 GB of RAM, all within 5-10 seconds.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Another shard type is the sub-shard. I use this terminology to define a different way of federating data for a predefined global object_id. A predefined global object_id is what you are federating by, for instance userid. &lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;userid X maps to Shard 3&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;What if you had an application that is expected to take up 8TB of data? Sticking all of userid X's data on a single shard is very costly since current size of the cluster + 8TB means more servers are needed and is costly (more data, larger ranges, slower the avg query speed). So why not put it on another set of servers that have big slow'ish disks, i.e. the Sub-Shard.&lt;br /&gt;&lt;br /&gt;Now for userid X&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;userid X maps to Shard 3&lt;br /&gt;userid X maps to SubShard 1&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Since some shard profiles do not require 100% uptime, I can run builds of mySQL that is cutting edge.&lt;br /&gt;&lt;br /&gt;Using the Percona 5.1 build, I ran across new tweaks for XtraDB. The patch is from the Google Patch made by &lt;a href="http://mysqlha.blogspot.com/"&gt;Mark Callaghan&lt;/a&gt;'s Google Team. Mark's architecture is purely disk I/O bound, and from his tests, INNODB does not use I/O effectively. He added a bunch of code dealing with the I/O performance bottlenecks that innodb native has. The three new tweaks addressing the I/O bottleneck specifically are&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;innodb_io_capacity = 100 // If running without the patch this is the equivalent default behavior&lt;br /&gt;innodb_write_io_threads = 1&lt;br /&gt;innodb_read_io_threads = 1&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;From what I gather, this means that DEFAULT INNODB uses one write thread 1 read thread and will only work on 100 pending iops at a time-which is probably why INNODB takes sooo long to shutdown since by default 90% of the buffer pool contains dirty pages.&lt;br /&gt;&lt;br /&gt;If your servers have a large innodb_buffer_size, on the order of a few Gigs and the server has many spindles (greater then 1) then increasing these params may help your application purge dirty pages faster. Be warned; do not increase this to high. If your box has a lot of pending I/O and this new code does not have the bandwidth to flush based on your settings, the box will freeze. That means you will have to remove traffic off of the server, let the pending io finish, lower the settings and restart.&lt;br /&gt;&lt;br /&gt;So, what is a good size to set this to?&lt;br /&gt;&lt;br /&gt;Percona says 100*Number of spindles you have on the db server. That seems a bit high for me. I would say 100*Number of spindles / 2 - to start out with, then over time ramp it up.&lt;br /&gt;&lt;br /&gt;Currently I am about to run 10 Shard or 20 servers with the Percona build. This will purely I/O bound load.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-4681450815565669987?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/4681450815565669987/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=4681450815565669987' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4681450815565669987'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4681450815565669987'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/01/shard-types-innodbiocapacity.html' title='Shard Types  &amp;&amp; innodb_io_capacity, innodb_write_io_threads, innodb_read_io_threads'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-9157338468258379483</id><published>2009-01-07T18:16:00.000-08:00</published><updated>2009-01-07T18:46:05.340-08:00</updated><title type='text'>Optimizer tricks</title><content type='html'>When you write good SQL, that use indexes properly there is one more obstacle that can slow down your app. The mySQL optimizer. From versions 3.23 to 5.1 the optimizer has been a problem for me. In mySQL 6.0 SUN/mySQL has resources improving it.&lt;br /&gt;&lt;br /&gt;I wrote a post detailing how to pick indexes to get the most out of mySQL &lt;a href="http://mysqldba.blogspot.com/2008/07/group-by-order-by-optimization-part-ii.html"&gt;here&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://mysqldba.blogspot.com/2007/09/mysql-optimizer-and-your-applications.html"&gt;Here&lt;/a&gt; is a post about the mySQL optimizer and what you can do to speed up your SQL SELECT statements.&lt;br /&gt;&lt;br /&gt;What I would like to share with you today, is that UPDATE and DELETE statements can also use optimizer tricks that SELECT uses. Its not documented on the mysql.com but it is possible to do something like&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;UPDATE [YOUR TABLE] &lt;span style="font-weight:bold;"&gt;USE INDEX(`your_index_name`)&lt;/span&gt; SET col='val' WHERE [columns that satisfy your index].&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;To see if you have problems with your UPDATE statements taking to much time I recommend 1st running INNODB as your storage engine since SHOW INNODB STATUS will indicate what index your long running UPDATE is using. This can be achieved by looking for /RECORD LOCKS space id.* index/ of SHOW INNODB STATUS.&lt;br /&gt;&lt;br /&gt;Another indicator that you might need to force the optimizer to do the right thing is to track "Deadlock detected" from concurrent update statements; look at the where clause of your UPDATE statement.&lt;br /&gt;&lt;br /&gt;Next solve your issues by telling mySQL to use the correct index when setting exclusive locks via the USE INDEX statement above.&lt;br /&gt;&lt;br /&gt;Hope this helps.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-9157338468258379483?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/9157338468258379483/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=9157338468258379483' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/9157338468258379483'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/9157338468258379483'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2009/01/optimizer-tricks.html' title='Optimizer tricks'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1280048385351871015</id><published>2008-12-16T18:07:00.000-08:00</published><updated>2008-12-16T18:40:25.005-08:00</updated><title type='text'>Logging to DB after PHP is done generating content</title><content type='html'>Oh how I love register_shutdown_function of PHP. This bad baby will execute at the end of the scripts in call order. So, how is this useful?&lt;br /&gt;&lt;br /&gt;Say you want to log some action, but that logging DB is on another server. Additionally you do not want to take into account that writing to the other server may break your transaction because of timeouts if logged in the middle of a transaction. Another use case: you do not want to change a bunch of code in different places to batch all logging routines up.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;public function __construct($platform){&lt;br /&gt;        parent::__construct($platform);&lt;br /&gt;        register_shutdown_function(array($this, 'afterProcess'));&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;public function afterProcess(){&lt;br /&gt;&lt;br /&gt;    foreach($this-&gt;getDataThatChange() as $key =&gt; $values){&lt;br /&gt;             $insert_data[] = $values;&lt;br /&gt;    }&lt;br /&gt;    &lt;br /&gt;    $this-&gt;getDBClass()-&gt;DB_logserver_insert_query("LoggingTable", $insert_data, 'delayed');&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;}&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;The function afterProcess will batch all the changes and do a bulk insert into a myISAM table so it can used the DELAYED functionality.&lt;br /&gt;&lt;br /&gt;This is done outside of all transactions, and at the end of the script as the data is returned.&lt;br /&gt;&lt;br /&gt;But, why not just use __deconstructor() in PHP? I want it to happen before the deconstructor is called.&lt;br /&gt;&lt;br /&gt;http://us3.php.net/manual/en/language.oop5.decon.php#76710&lt;br /&gt;&lt;br /&gt;I have been using register_shutdown_function for years now, especially to clean up connections at the end of script execution. Since someone asked me if this was possible in PHP, I decided to post this quick usage of a cool php method.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1280048385351871015?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1280048385351871015/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1280048385351871015' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1280048385351871015'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1280048385351871015'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/12/logging-to-db-after-php-is-done.html' title='Logging to DB after PHP is done generating content'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-4882692834738667561</id><published>2008-10-29T10:22:00.000-07:00</published><updated>2008-10-29T11:10:59.816-07:00</updated><title type='text'>INNODB Shared Locks, Exclusive Locks and INSERT INTO SELECT FROM</title><content type='html'>I wrote an application that is able to send out 3-8 million messages an hour with only 10 CPU's. This application is a part of an Offline Task system that scales linearly.&lt;br /&gt;&lt;br /&gt;How is this done, I'll go into detail hopefully at the mySQL conference if they accept my proposal.&lt;br /&gt;&lt;br /&gt;The scope of this blog post is to go over building the "Task Queues". Currently I have 13 Queues, one queue for each Shard that I run. The data is federated by user or randomly federated with a GUID that lives as long as the job. A request came in to add 20 million jobs to the queue all at once. The problem is with this list, will I cause deadlocks in innodb as I add the jobs to the queue as one transaction? Can live traffic still write to this queue?&lt;br /&gt;&lt;br /&gt;To verify that Deadlocks will not occur - having an understanding about how locks work in INNODB is key. I suggest reading &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/innodb-locks-set.html"&gt;this page&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;To build the queue I dumped the data source by &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;SELECT identifier, 16 INTO OUTFILE "/data/mysql/BuildQueue.log" FROM SOURCE_TABLE WHERE CONDITION.&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;The isolation level is REPEATABLE-READ; I'm setting a shared lock to get the most current version of the data. Writes are not blocked.&lt;br /&gt;&lt;br /&gt;Next:&lt;br /&gt;&lt;br /&gt;I create a table on each shard where the queue is located.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;CREATE TABLE IF NOT EXISTS OfflineTasksHold (&lt;br /&gt;    object_id bigint(20) NOT NULL DEFAULT 0,&lt;br /&gt;    object_type smallint unsigned NOT NULL DEFAULT 0,&lt;br /&gt;    PRIMARY KEY(object_id,object_type)&lt;br /&gt;) ENGINE=INNODB;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Then I issue a command on each Shard&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;LOAD DATA INFILE "/data/mysql/BuildQueue.log" INSERT INTO TABLE OfflineTasksHold;&lt;br /&gt;START TRANSACTION;&lt;br /&gt;INSERT INTO OfflineTasks (object_id, object_type) SELECT * FROM OfflineTasksHold;&lt;br /&gt;COMMIT;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Each shard is getting around 4-5 million rows, while accepting real-time traffic of 20-60 tasks a second to the OfflineTasks table. The OfflineTasksHold table does not have any real-time requirements and is solely used to keep the queue in Primary Key order, plus there is a CHANCE that LOAD DATA could set an exclusive lock on the OfflineTasks table-shutting down adding data to the table by the live site. The INSERT sets an exclusive lock on the rows that are being added, so the Offline Task Sheppard - the process that pops tasks off the queue is blocked for a small period - which is acceptable. Why are they blocked? Well, the massive insert sets an Exclusive Lock. The Sheppard is trying to grab the rows that are locked waiting up to 50 seconds, until innodb_wait_timeout is reached. This condition is acceptable. All other inserts are able to go into the queue without a slow down.&lt;br /&gt;&lt;br /&gt;In summary, the job queue is built and can be automated with confidence knowing that death to the various app will not occur. Processing slows down for a bit but speed right back up.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-4882692834738667561?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/4882692834738667561/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=4882692834738667561' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4882692834738667561'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4882692834738667561'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/10/innodb-shared-locks-exclusive-locks-and.html' title='INNODB Shared Locks, Exclusive Locks and INSERT INTO SELECT FROM'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3852890362828755881</id><published>2008-10-20T10:54:00.000-07:00</published><updated>2008-10-21T14:21:20.483-07:00</updated><title type='text'>mySQL Disk Benchmarks with Sysbench. When will mySQL / INNODB stop scaling?</title><content type='html'>Today's "&lt;span style="font-style:italic;"&gt;&lt;a href="http://en.wikipedia.org/wiki/Hard_disk_drive"&gt;commercial quality disks&lt;/a&gt;&lt;/span&gt;" are amazing but they follow the same limitations as yesterday's disks. mySQL scales very well, but disks do not. So if you're IO bound when will your expectation of speed fail?&lt;br /&gt;&lt;br /&gt;Test setup:&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight:bold;"&gt;DELL 2950 PERC-6 HWRaid BBC 6 DISK 15K RPM 3.5" RAID-10 256K stripe across two channels-using WRITE THROUGH CACHE on mkfs.ext3 -T largefile4 Linux Filesystem.&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The theory is that the outer part of the spindles is the fastest, and the inner portion is slower - since the outer is where the data starts (thanks for the info &lt;a href="http://benjamin-schweizer.de/files/iotest/"&gt;Benjamin Schweizer&lt;/a&gt;). Thus one can conclude that the more disk space your application(s) use the slower the throughput, since the heads have to move more. Brad F. my co-worker did a benchmark to prove this. Our goal is to find out at what is the saturation point if our expectation is to have 22 MB / sec of random access.&lt;br /&gt;&lt;br /&gt;Why do we want 22MB / sec of random access throughput? We want to guarantee a certain level of performance when adding new apps to a common backend-which is I/O bound: we need to know when things will break. &lt;br /&gt;&lt;br /&gt;Here is what Brad found: Total disk size for our RAID-10 setup =~ 800G. What point does it FAIL to achieve our expectations of sustained 22MB/s?&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;rndrw test across 100G test / 750G LV =~ 35 MB/s # outer part of the spindles&lt;br /&gt;rndrw test across 100G test / 300G LV =~ 32 MB/s # outer part of the spindles&lt;br /&gt;rndrw test across 250G test / 300G LV =~ 24 MB/s # sweet spot&lt;br /&gt;rndrw test across 350G test / 384G LV =~ 21 MB/s # saturation point&lt;br /&gt;rndrw test across 750G test / 800G LV =~ 14 MB/s # waste of space&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;In conclusion these test show that even though a RAID-10 setup with 800G of space is available, the expected performance drops when data exceeds the sweet spot of 250G-300G of 800G usable-data array.&lt;br /&gt;&lt;br /&gt;Disclaimers: There are many factors that can raise or lower the bar, like different file systems, different I/O schedulers, flushing. For my setups I like&lt;br /&gt;&lt;br /&gt;Deadline I/O scheduler&lt;br /&gt;256K Stripe&lt;br /&gt;few inodes (don't need them)&lt;br /&gt;ext3 since that’s what stable and available.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3852890362828755881?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3852890362828755881/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3852890362828755881' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3852890362828755881'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3852890362828755881'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/10/mysql-disk-benchmarks-with-sysbench.html' title='mySQL Disk Benchmarks with Sysbench. When will mySQL / INNODB stop scaling?'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-9102950540154981769</id><published>2008-09-19T10:44:00.001-07:00</published><updated>2008-09-19T12:11:40.745-07:00</updated><title type='text'>How do you know when you need more memcache servers?</title><content type='html'>Let me first start of with the disclaimer, that I do not use memcache to scale, I use it to reduce latency. I'm of firm belief that the database layer should be able to handle the requests, while memcache is used to keep frequent requests returning in a consistent time frame i.e. reduce I/O spikes.&lt;br /&gt;&lt;br /&gt;Capacity planning is key to making sure your site can serve the requests to users. If the site is slow, or down that is loss revenue in any revenue model used to monetize your product.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;How to determine when you need to add more memcache servers.&lt;br /&gt;&lt;br /&gt;The stats I look at are system stats and memcache stats.&lt;br /&gt;&lt;br /&gt;Memcache is  Memory / network heavy. CPU spikes are very low, and if the CPU starts maxing out that is probably due to some sort of network driver issue or huge context switching or large values stored in memcache.&lt;br /&gt;&lt;br /&gt;So on the system side I look at vmstat&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;[root@memcached1 ~]# vmstat 5&lt;br /&gt;procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----&lt;br /&gt; r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa&lt;br /&gt; 0  0    208 207400  41452  50552    0    0    10   198    0     0  2  9 87  2&lt;br /&gt; 0  1    208 207408  41452  50552    0    0     0    81 15617 17671  3 13 84  0&lt;br /&gt; 0  0    208 207352  41452  50552    0    0     0    56 15508 17514  3 13 84  0&lt;br /&gt; 1  0    208 207248  41452  50620    0    0     0   310 15295 16762  3 12 84  0&lt;br /&gt; 0  0    208 207248  41452  50620    0    0     0    31 15512 17167  2 13 84  0&lt;br /&gt; 0  0    208 207256  41452  50620    0    0     0     3 15925 18214  3 14 84  0&lt;br /&gt; 0  0    208 207264  41452  50620    0    0     0     0 15456 16923  3 13 85  0&lt;br /&gt; 0  0    208 207264  41452  50620    0    0     0   213 15782 17604  3 13 84  0&lt;br /&gt; 0  0    208 207264  41452  50620    0    0     0    40 15860 18036  2 13 84  0&lt;br /&gt; 2  0    208 207272  41452  50620    0    0     0   214 15926 18248  3 14 84  0&lt;br /&gt; 0  0    208 207288  41452  50620    0    0     0    77 15781 17617  3 13 84  0&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;This server dedicated to memcache. The context switching is huge due to all of the constant requests-but we are talking about modern day CPU's which can context switch like crazy. The thing that is bugging me is that requests are starting to go into the run queue, not at a alarming rate but still this is an indication of some possible issue.&lt;br /&gt;&lt;br /&gt;This is something that is graphed on ganglia. If the run queue on average starts increasing, there is some problem.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Next stats from memcache.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;      /**&lt;br /&gt;        * @desc get extended status from all servers&lt;br /&gt;        */&lt;br /&gt;        public function CacheGetStats(){&lt;br /&gt;&lt;br /&gt;            if ($GLOBALS[cfg][disable_feature_memcache]){&lt;br /&gt;&lt;br /&gt;                return true;&lt;br /&gt;&lt;br /&gt;            }&lt;br /&gt;&lt;br /&gt;            return $this-&gt;memcache_obj-&gt;getExtendedStats();&lt;br /&gt;       }&lt;br /&gt;        &lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;I have a class called Cache which is a wrapper around memcache class calls. Cal Henderson would kill me if I was using classes at Flickr. Don't get me wrong I agree with Cal 100% but the environment I am in now requires classes-so I have to use it. The reason why we don't like classes is for another post.&lt;br /&gt;&lt;br /&gt;So the output.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;            [pid] =&gt; 17696&lt;br /&gt;            [uptime] =&gt; 2748911&lt;br /&gt;            [time] =&gt; 1221850214&lt;br /&gt;            [version] =&gt; 1.2.2&lt;br /&gt;            [pointer_size] =&gt; 64&lt;br /&gt;            [rusage_user] =&gt; 135944.231335&lt;br /&gt;            [rusage_system] =&gt; 420733.419798&lt;br /&gt;            [curr_items] =&gt; 6012187&lt;br /&gt;            [total_items] =&gt; 2362145406&lt;br /&gt;            [bytes] =&gt; 4737438938&lt;br /&gt;            [curr_connections] =&gt; 654&lt;br /&gt;            [total_connections] =&gt; 4128179078&lt;br /&gt;            [connection_structures] =&gt; 7293&lt;br /&gt;            [cmd_get] =&gt; 12681552588&lt;br /&gt;            [cmd_set] =&gt; 2362145408&lt;br /&gt;            [get_hits] =&gt; 9880855733&lt;br /&gt;            [get_misses] =&gt; 2800696855&lt;br /&gt;            [evictions] =&gt; 0&lt;br /&gt;            [bytes_read] =&gt; 2564412782739&lt;br /&gt;            [bytes_written] =&gt; 12893067371405&lt;br /&gt;            [limit_maxbytes] =&gt; 5242880000&lt;br /&gt;            [threads] =&gt; 4&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Notice on this server we have a good hit rate and no evictions. Yet looking at one server is not good enough, look at them all- the reason more memcache servers means more memory to store data for your application. The CRC32 hash that the PHP memcache client uses is not very even and some keys may be requested more.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&lt;br /&gt;            [pid] =&gt; 13956&lt;br /&gt;            [uptime] =&gt; 4228079&lt;br /&gt;            [time] =&gt; 1221850213&lt;br /&gt;            [version] =&gt; 1.2.2&lt;br /&gt;            [pointer_size] =&gt; 64&lt;br /&gt;            [rusage_user] =&gt; 268369.193681&lt;br /&gt;            [rusage_system] =&gt; 711491.537845&lt;br /&gt;            [curr_items] =&gt; 5219411&lt;br /&gt;            [total_items] =&gt; 3686853272&lt;br /&gt;            [bytes] =&gt; 4751658935&lt;br /&gt;            [curr_connections] =&gt; 675&lt;br /&gt;            [total_connections] =&gt; 4154000955&lt;br /&gt;            [connection_structures] =&gt; 9981&lt;br /&gt;            [cmd_get] =&gt; 19489963453&lt;br /&gt;            [cmd_set] =&gt; 3686853275&lt;br /&gt;            [get_hits] =&gt; 15062084538&lt;br /&gt;            [get_misses] =&gt; 4427878915&lt;br /&gt;            [evictions] =&gt; 11210410&lt;br /&gt;            [bytes_read] =&gt; 3908139025173&lt;br /&gt;            [bytes_written] =&gt; 10744393525089&lt;br /&gt;            [limit_maxbytes] =&gt; 5242880000&lt;br /&gt;            [threads] =&gt; 4&lt;br /&gt; &lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Take a look at this server. The evictions are high, indicating that memcache needs to make room for new objects. This is not good, its an indication that the LRU is evicting objects out faster then their expire time. Additionally the memcache gets are much greater then the hits. This is an indication that memcache is not really working as good as it can.&lt;br /&gt;&lt;br /&gt;But one server is not an indication that there is a problem. Looking at the system as a whole is to determine if a problem exists. My rule of thumb is if the 30-40% of the servers have a high eviction rate, its time to add 30-40% more servers or memory.&lt;br /&gt;&lt;br /&gt;Now allot of this can be tuned by changing the slab size, but learning from John Allspaw, don't make a plan based on a possible gain, make a plan based on the current usage. Then if the possible gain works your golden.&lt;br /&gt;&lt;br /&gt;How do you base your stats on adding more memcache servers?&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-9102950540154981769?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/9102950540154981769/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=9102950540154981769' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/9102950540154981769'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/9102950540154981769'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/09/how-do-you-know-when-you-need-more.html' title='How do you know when you need more memcache servers?'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7946077061740653012</id><published>2008-09-19T10:44:00.000-07:00</published><updated>2008-09-19T11:08:19.894-07:00</updated><title type='text'>DB Designer -&gt; MySQL workbench</title><content type='html'>I had a lot of good experience using fabforce's DB designer. Now that I upgraded to mySQL workbench, I have nothing but good things to say.&lt;br /&gt;&lt;br /&gt;In 5 mins I was up and running after install. I took my mysqldump of my schema&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;mysqldump -d --no-add-drop-tables &lt;DB_NAME&gt;&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Clicked File-&gt;Import-&gt;Reverse Engineer MySQL Script&lt;br /&gt;&lt;br /&gt;Tada a picture of my schema.&lt;br /&gt;&lt;br /&gt;Pictures are always nice to represent what your conveying to individuals who may not be as experienced with the db structure as yourself. Just for this feature alone I recommend using Work Bench.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7946077061740653012?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7946077061740653012/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7946077061740653012' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7946077061740653012'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7946077061740653012'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/09/db-designer-mysql-workbench.html' title='DB Designer -&gt; MySQL workbench'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5397809168041610440</id><published>2008-08-20T15:38:00.000-07:00</published><updated>2008-08-20T16:09:30.979-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='INNODB plugin'/><title type='text'>Can't initialize function 'INNODB'; Plugin initialization function failed</title><content type='html'>So running mysql-5.1.24rc with INNODB-Plugin-1a I ran into "Can't initialize function 'INNODB'; Plugin initialization function failed", and noticed a more serious issue with the whole plugin architecture.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Before without plugin if the engine died so did mysqld. Now, mysqld can stay up and  accept connections. But what if you have 1000 mysql instances, and all of your Nagios checks where to test if mysqld was up and accepting connections? This entire test is now invalid. I now need a Nagios check to test if the plugin-engine is up, by doing a write to the engine.&lt;br /&gt;&lt;br /&gt;Now to the cause of the issue:&lt;br /&gt;&lt;br /&gt;Here is my condition. 5.1.24rc with INNODB-Plugin was killed by OOM killer (mysqld memory leak?). When it came back, my expectation was for innodb to go through a recovery process, it didn't. Strange. Then I got this message from replication.&lt;br /&gt;&lt;br /&gt;Slave: Unknown table engine 'InnoDB' Error_code: 1286&lt;br /&gt;&lt;br /&gt;So I did a show plugin command and innodb showed that it was installed. Since I'm out of ideas I thought that I would unload the various INNODB plugins.&lt;br /&gt;&lt;br /&gt;Tada found the problem, I could not uninstall them something is wrong with the plugin. So, to make everything smooth for a reinstall of the plugin.&lt;br /&gt;&lt;br /&gt;1. TRUNCATE mysql.plugins // I don't know if this is needed&lt;br /&gt;2. restart mysql&lt;br /&gt;3. INSTALL PLUGIN INNODB SONAME 'ha_innodb.so';&lt;br /&gt;&lt;br /&gt;Now I have an error&lt;br /&gt;InnoDB: mmap(26600275968 bytes) failed; errno 12&lt;br /&gt;InnoDB: Fatal error: cannot allocate the memory for the buffer pool&lt;br /&gt;&lt;br /&gt;Weird&lt;br /&gt;&lt;br /&gt;I look at whats allocated. mysqld is consuming 11GB when nothing is loaded, and the various key buffers are sized very low. So, just to test a theory I lower the innodb bufferpool from 26GB to 19GB. Since 19+11 = 30GB and I have 32GB total.&lt;br /&gt;&lt;br /&gt;1. start mysql&lt;br /&gt;2. INSTALL PLUGIN INNODB SONAME 'ha_innodb.so';&lt;br /&gt;&lt;br /&gt;Now it hangs. But, now this looks a familiar failure, I think its now recovering, so I look in the mysql error log and indeed it is recovering!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5397809168041610440?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5397809168041610440/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5397809168041610440' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5397809168041610440'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5397809168041610440'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/08/cant-initialize-function-innodb-plugin.html' title='Can&apos;t initialize function &apos;INNODB&apos;; Plugin initialization function failed'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8879959852785185526</id><published>2008-08-13T16:05:00.001-07:00</published><updated>2008-08-13T16:34:37.296-07:00</updated><title type='text'>INNODB Plugin, mysql 5.1 innodb_file_format=Barracuda</title><content type='html'>I put this bad boy into production, and its working great.&lt;br /&gt;&lt;br /&gt;So, I had an app that was sticking data into S3 when the data was marked or qualified as warehouse data. The problem with this method was when retrieving the data from S3 it was not chunked but appended to. So pulling 100M over the wire, parsing, sorting and adding to the db would fail since the process consumed more then its allowed share of resources. Plus grab data from S3 has a very high response time, plainly put its slow.&lt;br /&gt;&lt;br /&gt;So, since the warehouse data is prone to error-the main reason why I needed a new solution, I decided to use ZLIB compression that is native to Innodb's Barracuda build.&lt;br /&gt;&lt;br /&gt;To &lt;a href="http://www.innodb.com/doc/innodb_plugin-1.0/innodb-plugin-installation.html"&gt;install innodb plugin&lt;/a&gt; here is a good write up I will not duplicate it.&lt;br /&gt;&lt;br /&gt;Configuring the plugin you must set in your my.cnf&lt;br /&gt; &lt;br /&gt;&lt;blockquote&gt;skip-innodb&lt;br /&gt;innodb_file_format=Barracuda&lt;br /&gt;innodb_file_per_table=1&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;LOAD the INNODB plugin via a few commands and tada your done.&lt;br /&gt;&lt;br /&gt;Next define your table with&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;I use utf8 for everything, KEY_BLOCK_SIZE will define how much extra overhead the CPU will have to work, think of this as a compression level. I use 4K since some rough benchmarks I've done shows that this is the sweet spot for my INSERT rate + Select rate.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Finally I stick the entire dataset on a slow RAID-5 array. I use RAID-5 because when I want the data-I want to get it fast and put it into the production dataset real-time. I need the disk space and redundancy. The writes are not extraordinary  and not very expensive from a I/O perspective. I'm getting 60% reduction in the INNODB table space. On top of that the format of the data is the same format as the production system. So I can pull this data fast write to the shard in a single httpd process. I can migrate archive data to production quality in less then a second for every 3000 rows. I qualify this as a win. We'll see how stable this is over the next few weeks.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;So in conclusion, I've replaced S3 with a cheaper solution, faster solution, I put archived user data on cheap servers, and as a result the price per user goes down, since my capacity plan is based off a function of disk growth-on my fast disk cluster. Next I can store my backups on S3 instead of semi-live data :)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8879959852785185526?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8879959852785185526/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8879959852785185526' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8879959852785185526'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8879959852785185526'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/08/innodb-plugin-mysql-51.html' title='INNODB Plugin, mysql 5.1 innodb_file_format=Barracuda'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3908955247879115993</id><published>2008-08-07T12:13:00.000-07:00</published><updated>2008-08-15T17:19:09.804-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='scaling'/><category scheme='http://www.blogger.com/atom/ns#' term='monitor'/><category scheme='http://www.blogger.com/atom/ns#' term='shard'/><category scheme='http://www.blogger.com/atom/ns#' term='ganglia'/><category scheme='http://www.blogger.com/atom/ns#' term='capacity'/><title type='text'>Capacity Planning, Architecture, Scaling, Response time, Throughput</title><content type='html'>First of all let me start off saying that I learned a lot of Capacity Planning from two people. Jozo Dujmovic, and &lt;a href="http://kitchensoap.com/"&gt;John Allspaw&lt;/a&gt;-who by the way is coming out with a book.&lt;a href="http://www.slideshare.net/jallspaw/velocity2008-capacity-management1-484676"&gt;&lt;br /&gt;&lt;br /&gt;Capacity != Performance&lt;/a&gt;. You may have the capacity to do a bubble sort but a bubble sort is still a bubble sort.&lt;br /&gt;&lt;br /&gt;Really to Scale you need to know when your application will break. I have a tool set to help determine what application is producing what SQL and use that to figure out which SQL is producing the most load on the system. Some common tricks I do is put the execution path automatically as a SQL comment, then sample the FULL Processlist to build a graph on what application, function, SQL pattern is the top load. &lt;br /&gt;&lt;br /&gt;On top of that I use Ganglia to trend the use of each mysql box. Key metrics that I use to determine capacity.&lt;br /&gt;&lt;br /&gt;From iostat -x&lt;br /&gt;&lt;br /&gt;I/O wait&lt;br /&gt;atime&lt;br /&gt;svctm&lt;br /&gt;&lt;br /&gt;If the service average is trending towards 20% I/O wait I know that that is a hard-limit for my server configuration that will cause slave lag.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://jcole.us/blog/archives/2007/05/08/on-iostat-disk-latency-iohist-onward/"&gt;Jeremy Cole&lt;/a&gt; has a good write up and a tool for getting I/O stats that iostat itself does not expose.&lt;br /&gt;&lt;br /&gt;If the atime (Response time) is growing, I know that the overall SAN LUNS are saturated. On top of that SAN LUNS typically have larger svctm cutting overall throughput with how MYSQL works. On a side NOTE I despise using SANs for mySQL. Why well that's another post.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Then I have ganglia configured to monitor everything for SHOW GLOBAL STATUS, but really I only look at the following&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;Com_delete &lt;br /&gt;Com_insert &lt;br /&gt;Com_replace &lt;br /&gt;Com_select &lt;br /&gt;Com_update &lt;br /&gt;Key_reads &lt;br /&gt;Questions &lt;br /&gt;Connections &lt;br /&gt;Threads_created &lt;br /&gt;Slow_queries &lt;br /&gt;Handler_read_next &lt;br /&gt;Handler_read_rnd &lt;br /&gt;Handler_read_rnd_next&lt;br /&gt;Handler_rollback &lt;br /&gt;Innodb_buffer_pool_read_requests &lt;br /&gt;Innodb_buffer_pool_reads &lt;br /&gt;Innodb_pages_created &lt;br /&gt;Innodb_data_pending_reads &lt;br /&gt;Innodb_buffer_pool_read_ahead_rnd &lt;br /&gt;Innodb_data_read &lt;br /&gt;Innodb_data_written&lt;br /&gt;Innodb_row_lock_time_avg&lt;br /&gt;Innodb_row_lock_time &lt;br /&gt;Table_locks_immediate &lt;br /&gt;Table_locks_waited &lt;br /&gt;Sort_merge_passes  &lt;br /&gt;Sort_range         &lt;br /&gt;Sort_rows      &lt;br /&gt;Sort_scan      &lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Next I take the techniques I learned from John Allspaw and build a 3rd order Polynomial and verify that my &lt;a href="http://en.wikipedia.org/wiki/Coefficient_of_determination" alt="thnx allspaw"&gt;R^2&lt;/a&gt; is in the 98%tile to see when I need to add more servers. So far so good. Now I have a rough idea when I need to add more servers-a capacity plan. (The techniques involved are various ratios of Users per Application per Shard, busy time, more junk like that)&lt;br /&gt;&lt;br /&gt;Now your Architecture allows you to Scale, by ensuring a High Throughput at a low Response Time.&lt;br /&gt;&lt;br /&gt;I personally use a architecture that I've started on since 1999-Shard'ing. Brad Fitzpatrick when building Live-Journal really made this concept popular.&lt;br /&gt;&lt;br /&gt;With my Federation strategy I've been able to scale some of the most toughest dynamic applications linearly by just arbitrary adding more servers. It takes 5 min. to deploy new DB servers.&lt;br /&gt;&lt;br /&gt;So, in summary to capacity plan you need to know how the system works, monitor it and trend it. To scale: your database architecture needs to meet the needs of the app. Are you read or write heavy or both? Do you have a lot of concurrency? Does your app do a lot of sorts? Does your app do a lot of ranges? Is it all of the above? Design to meet the needs, benchmark, know when it will break, and have a plan to recover before it does.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3908955247879115993?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3908955247879115993/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3908955247879115993' title='7 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3908955247879115993'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3908955247879115993'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/08/capacity-planning-archetecture-scaling.html' title='Capacity Planning, Architecture, Scaling, Response time, Throughput'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>7</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8191301827665896697</id><published>2008-07-30T17:12:00.001-07:00</published><updated>2008-07-30T17:44:44.901-07:00</updated><title type='text'>What should I use MYISAM or INNODB?</title><content type='html'>I get this question allot, and my response is "it depends", people rarely like this general response. To pick a storage engine, the user must understand the gains and losses of using the storage engine, basically understand what INNODB and MYISAM do, and how they behave under different load patterns.&lt;br /&gt;&lt;br /&gt;When picking a storage engine I go through a checklist-below is a quick list to get an idea of the thought process.&lt;br /&gt;&lt;br /&gt;Do you require transactions?&lt;br /&gt;&lt;blockquote style="clear"&gt;If yes use INNODB else you may still want to use INNODB?&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Are you doing a lot of big queries that Scan 20-30% of the rows?&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;If yes use MYISAM. It's better at doing large queries where the query requires a full table lock. INNODB will lock each row as it scans through it which hurts query throughput.&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Are you building an app to store 1 row and access said row really fast and at a high concurrency?&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;See &lt;a href="http://mysqldba.blogspot.com/2007/03/so-why-use-mysql-50.html"&gt;Tickets explanation&lt;/a&gt; of the example of an application that does this. Innodb hits a lock bottle neck when operating on the same row from many different threads, so it may not be advisable to use INNODB.&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Are you building an app that stores a lot of blob data?&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;Now this is a grey area. With INNODB native zlib compression in 5.1+ INNODB may be a good choice over myISAM, while myISAM historically uses less diskpace then its INNODB counter-part. So, don't be afraid to create a single INNODB table and 100s of MYISAM tables that hold the blob data for INNODB.&lt;br /&gt;&lt;br /&gt;&lt;/blockquote&gt;Why do this? Because a single table in MYISAM will likely lock slowing down blob insertion / removal.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Do you require a lot of reads and writes where the ratio is not 90% reads 20% writes but more like 60/40?&lt;br /&gt;&lt;br /&gt;Use INNODB over myISAM. If you don't believe me take a look at &lt;br /&gt;&lt;pre&gt;&lt;br /&gt;show global status like 'table_locks_waited';   &lt;br /&gt;+--------------------+-------+&lt;br /&gt;| Variable_name      | Value |&lt;br /&gt;+--------------------+-------+&lt;br /&gt;| Table_locks_waited | 82721 |&lt;br /&gt;+--------------------+-------+&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;These are generic common questions that personally help me find out whats best to use. In reality to pick the correct storage engine, you should experiment and find out whats best for your app. Understand the storage engine-do some testing, then its easy to pick which is best.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8191301827665896697?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8191301827665896697/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8191301827665896697' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8191301827665896697'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8191301827665896697'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/07/what-should-i-use-myisam-or-innodb.html' title='What should I use MYISAM or INNODB?'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3119376022336963068</id><published>2008-07-14T16:28:00.000-07:00</published><updated>2008-07-14T16:58:05.273-07:00</updated><title type='text'>ALTER TABLE, SELECT AND INNODB</title><content type='html'>Let's assume you have a 512MB table, and you decide to alter the table to add an index to make queries faster.&lt;br /&gt;&lt;br /&gt;How long would you expect this alter to take? Hours? Days? &lt;br /&gt;&lt;br /&gt;Even with 7200 RPM-slow disks the alter should of finished in less then 1/2 hour.&lt;br /&gt;&lt;br /&gt;I ran across an alter that was running for 4 days-on 512MB datasize. The reason why it ran so long is because there was a SELECT that was running preventing mySQL from performing "rename table", the last leg of the ALTER TABLE process.&lt;br /&gt;&lt;br /&gt;Killing that SELECT released the shared lock allowing the alter to finish.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Do not KILL THE ALTER when stuck in this SHARED LOCK STATE; Do NOT then remove the temporary tablespace file "#sql-320f_106f99a2.*". &lt;br /&gt;&lt;br /&gt;What will happen if you do remove the #sql* file by hand?&lt;br /&gt;&lt;br /&gt;Well for one INNODB will crash the mysql instance saying it could not find the temporary table space. It failed to open it. Then on recovery the original table gets unlinked from the filesystem and you just lost all data for that tablespace.&lt;br /&gt;&lt;br /&gt;Why?&lt;br /&gt;&lt;br /&gt;Here are roughly the order of events for an alter:&lt;br /&gt;&lt;br /&gt;Lock all writes from said table&lt;br /&gt;Make a temporary table #sql - file&lt;br /&gt;Copy all data from the old file to the new file&lt;br /&gt;Do a quick consistency check between the two files&lt;br /&gt;unlink the old file&lt;br /&gt;rename the temp file into the old file name&lt;br /&gt;&lt;br /&gt;Each step operates on the data dictionary pointers for the two tables. Issuing a filesystem rm command for the step right before unlink, will cause INNODB to crash and on recovery unlink the old file and of course fail on the rename.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3119376022336963068?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3119376022336963068/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3119376022336963068' title='9 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3119376022336963068'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3119376022336963068'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/07/alter-table-select-and-innodb.html' title='ALTER TABLE, SELECT AND INNODB'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>9</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7197363042031964677</id><published>2008-07-07T18:36:00.000-07:00</published><updated>2009-01-07T18:49:05.437-08:00</updated><title type='text'>Group by ORDER by Optimization part II</title><content type='html'>&lt;a href="http://mysqldba.blogspot.com/2007/09/mysql-optimizer-and-your-applications.html"&gt;In my previous blog post&lt;/a&gt; I talk about GROUP BY and ORDER BY optimizations. A member asked a great question that I'd like to share with everyone.&lt;br /&gt;&lt;br /&gt;&lt;i&gt;&lt;br /&gt;But what if the query was:&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;SELECT c1, c2, c3, SUM(c4) FROM T WHERE c1 = ? GROUP BY c2 ORDER BY c3 DESC LIMIT 10;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;&lt;/i&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;That query would produce a temp table and a filesort.&lt;br /&gt;explain  SELECT c1, c2, c3, SUM(c4) FROM column_test WHERE c1 = 1 GROUP BY c2 ORDER BY c3 DESC LIMIT 10\G&lt;br /&gt;*************************** 1. row ***************************&lt;br /&gt;           id: 1&lt;br /&gt;  select_type: SIMPLE&lt;br /&gt;        table: column_test&lt;br /&gt;         type: ref&lt;br /&gt;possible_keys: c1&lt;br /&gt;          key: c1&lt;br /&gt;      key_len: 5&lt;br /&gt;          ref: const&lt;br /&gt;         rows: 1&lt;br /&gt;        Extra: Using where; Using index; Using temporary; Using filesort&lt;br /&gt;1 row in set (0.00 sec)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The reason the index is c1,c2,c3,c4&lt;br /&gt;&lt;br /&gt;So where c1=? and the group by of c2 would use that index, but to order the data properly you would need to do&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;WHERE c1 = ? GROUP BY c2, c3 ORDER BY c1 DESC, c2 DESC, c3 DESC&lt;br /&gt;&lt;br /&gt;explain  SELECT c1, c2, c3, SUM(c4) FROM column_test WHERE c1 = 1 GROUP BY c2,c3 ORDER BY c1, c2\G&lt;br /&gt;&lt;br /&gt;*************************** 1. row ***************************&lt;br /&gt;           id: 1&lt;br /&gt;  select_type: SIMPLE&lt;br /&gt;        table: column_test&lt;br /&gt;         type: ref&lt;br /&gt;possible_keys: c1&lt;br /&gt;          key: c1&lt;br /&gt;      key_len: 5&lt;br /&gt;          ref: const&lt;br /&gt;         rows: 1&lt;br /&gt;        Extra: Using where; Using index&lt;br /&gt;1 row in set (0.00 sec)&lt;br /&gt;&lt;br /&gt;to get rid of the temp table or filesort. Filesorts and temp tables takes about 50% of the query time-so avoid these when the query is requested at a huge rate.&lt;br /&gt;&lt;br /&gt;Also note that to get rid of the temporary table and filesort, the query changed and does not answer your question without post processing the data in PHP or some other layer.&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The reason: the data is ordered by the index. For innodb the entire table is ordered by the PRIMARY key, each index has a reference to. The mysql implementation of  group by does not know how to traverse and sort the data in the 1st pass of part of the key. The optimizer needs a lot of work. So, what is done from the mysql level is to automatically create a temp table and sort on that instead of using the index which it already traversed.&lt;br /&gt;&lt;br /&gt;I believe in 5.1 that this case is being worked on in the optimizer level to get rid of this common slowdown.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7197363042031964677?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7197363042031964677/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7197363042031964677' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7197363042031964677'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7197363042031964677'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/07/group-by-order-by-optimization-part-ii.html' title='Group by ORDER by Optimization part II'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-2104805576656335650</id><published>2008-06-11T13:53:00.001-07:00</published><updated>2008-06-16T21:22:43.413-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='group by'/><category scheme='http://www.blogger.com/atom/ns#' term='index'/><category scheme='http://www.blogger.com/atom/ns#' term='order by'/><category scheme='http://www.blogger.com/atom/ns#' term='optimization'/><category scheme='http://www.blogger.com/atom/ns#' term='left-most-prefix'/><title type='text'>How to pick indexes for order by and group by queries</title><content type='html'>First some of the things that you need to use and understand&lt;br /&gt;&lt;br /&gt;&lt;a href="http://dev.mysql.com/doc/refman/5.0/en/using-explain.html"&gt;Explain Syntax&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://dev.mysql.com/doc/refman/5.0/en/order-by-optimization.html"&gt;Order by Optimization&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://dev.mysql.com/doc/refman/5.0/en/group-by-optimization.html"&gt;Group by Optimization&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Update: Updated errors.&lt;br /&gt;&lt;br /&gt;Now some details that are usually missed. GROUP BY does sorting unless you tell mysql not to. GROUP BY has two optimization methods, loose index scan, and tight index scan.&lt;br /&gt;&lt;br /&gt;Loose index scan, scans the entire table index, while tight index scan uses some sort of constraint. For large datasets that are accessed often and require some sort of group by, tight index scans are better.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;So how to pick columns to create the optimal indexes. Here is a list of practices (rules) that I personally follow:&lt;br /&gt;&lt;br /&gt;1. What is the question asking?&lt;br /&gt;2. What current indexes are on the table?&lt;br /&gt;3. Can the query be re-written to use an existing index?&lt;br /&gt;4. What is the overhead of adding a new index?&lt;br /&gt;5. &lt;a href="http://mysqldba.blogspot.com/2006/08/indexes-optimizer-and-doing-efficent.html"&gt;Follow left-most prefix rules&lt;/a&gt;&lt;br /&gt;6. Build indexes that remove filesorts and temporary tables, for all query types.&lt;br /&gt;7. Use statements that reduce the rows examined, and keep each index small. Note: each secondary index in INNODB requires its own page.&lt;br /&gt;&lt;br /&gt;So lets look at a table&lt;br /&gt;&lt;br /&gt;T(c1,c2,c3,c4)&lt;br /&gt;PRIMARY(c1,c2);&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;A question asking&lt;br /&gt;&lt;br /&gt;SELECT c1, c2, c3, SUM(c4) FROM T WHERE c1 = ? GROUP BY c1,c2,c3 ORDER BY c3 DESC LIMIT 10;&lt;br /&gt;&lt;br /&gt;Instinct is to add an INDEX&lt;br /&gt;&lt;br /&gt;IDX(c1,c2,c3,c4); Since c1 is the constraint, grouped by c2,c3 SUMMING c4.&lt;br /&gt;&lt;br /&gt;BUT when running this condition through explain you'll see that in the EXTRA column&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;Using where; Using temporary; Using filesort&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;This is bad, it means WHERE is constraining the clause but the GROUP BY and ORDER BY is producing a temporary table, and a second pass to sort the data.&lt;br /&gt;&lt;br /&gt;So your query time is find the rows - put them in temp table which can hit disk, sort them randomly tickling a few thread based buffers. Queries such as these do not scale and can hog up memory.&lt;br /&gt;&lt;br /&gt;Let's breakdown what the question is asking.&lt;br /&gt;&lt;br /&gt;Give me all the results for c1 where c1 == ?, flatten the results, ORDER the results by the MAX of c3, SUM c4.&lt;br /&gt;&lt;br /&gt;Now that we know what the question is asking, lets "re-word" the question&lt;br /&gt;&lt;br /&gt;SELECT c1, c2, c3, SUM(c4) FROM T WHERE c1 = ? GROUP BY c2, c3 ORDER BY c3 DESC, c2 DESC LIMIT 10;&lt;br /&gt;&lt;br /&gt;This says&lt;br /&gt;&lt;br /&gt;Give me all the results for c1 where c1 ==?, flatten the results by the MAX of c3, SUM c4 and if c3.rowN-1 == c3.rowN the tie breaker will be c2.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;This is the same asking question reworded. Since GROUP BY AND ORDER BY is sorting the MAX of c3-c2 column refers to the same c2 values as the original query output.&lt;br /&gt;&lt;br /&gt;The IDX is now&lt;br /&gt;&lt;br /&gt;IDX(c1,c3,c2) and explain produces an extra of &lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;Using WHERE&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;This is still not good enough-since there is an additional seek to return the columns asked for. Also SUM is being done on the data and not on the IDX.&lt;br /&gt;&lt;br /&gt;To fix this IDX should be&lt;br /&gt;&lt;br /&gt;IDX(c1,c3,c2,c4) and now explain produces an extra of&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;Using WHERE; Using index.&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;What does Using index mean? This means that mysql will not have to do an additional seek to read the actual row.&lt;br /&gt;&lt;br /&gt;Now group by and order by have been optimized with adding an additional key.&lt;br /&gt;&lt;br /&gt;How did I know to re-word the question and it would produce the same result?&lt;br /&gt;Lets look at order by: That is the ending statement that changes the data. So, since c3 is also used to flatten the constraint I knew that c3 is all that is needed to refer to the same rows, c2 is used to be a tie-breaker when c3RowN-1 == c3RownN.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;These are the sort of hints you can use to optimize SQL.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-2104805576656335650?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/2104805576656335650/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=2104805576656335650' title='7 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2104805576656335650'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2104805576656335650'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/06/how-to-pick-indexes-for-order-by-and.html' title='How to pick indexes for order by and group by queries'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>7</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6634698650893184527</id><published>2008-05-20T17:48:00.000-07:00</published><updated>2008-05-21T23:43:24.200-07:00</updated><title type='text'>How to cause deadlocks in INNODB and fix them</title><content type='html'>So imagine you have a table and for every row it contains an INT that represents a user, a medium int that represents an activity number, and a ENUM that represents state.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&lt;br /&gt;CREATE TABLE UserStack (&lt;br /&gt;id int unsigned not null default 0,&lt;br /&gt;activity medium int unsigned not null default 0,&lt;br /&gt;state ENUM('ready','processing','processed', 'error'),&lt;br /&gt;PRIMARY KEY(id),&lt;br /&gt;INDEX (state, activity)&lt;br /&gt;) ENGINE = INNODB;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Now you have an application that is spread across many servers which pops a set of users off the stack and sets them to a processing state.&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&lt;br /&gt;START TRANSACTION&lt;br /&gt;&lt;br /&gt;SELECT * FROM UserStack WHERE state='ready' ORDER BY activity DESC LIMIT 10 FOR UPDATE&lt;br /&gt;&lt;br /&gt;foreach id &lt;br /&gt;   &lt;br /&gt;   Mark them Processing&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;If multiple threads do this at the same time; the 1st thread will get the 10 ids while 99% of the others will fail with "Deadlock Detected try restarting transaction"&lt;br /&gt;&lt;br /&gt;The reason: from innodb's perspective many different clients are asking to perform actions on the same data all at the same time-thus a deadlock is detected. To exacerbate the problem the data is ordered differently from the cluster index-so in essence the entire table is scanned for this example. The table has 30 million rows.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I've tested transaction isolation levels:&lt;br /&gt;&lt;br /&gt;READ COMMITED&lt;br /&gt;REPEATABLE READ&lt;br /&gt;SERIALIZED&lt;br /&gt;&lt;br /&gt;How to get around this:&lt;br /&gt;&lt;br /&gt;Lets add a column to make the rows unique for the calling process; lets add pid (add server int unsigned if you want to run the process from many servers).&lt;br /&gt;&lt;br /&gt;The access pattern for the table is now going to involve pid and state. The column pid nor the combo with state is unique, so the user id which defines the user.&lt;br /&gt;&lt;br /&gt;ALTER TABLE UserStack DROP PRIMARY KEY, ADD pid int unsigned NOT NULL DEFAULT 0, ADD PRIMARY KEY(state,pid,id), DROP INDEX state;&lt;br /&gt;&lt;br /&gt;Then change SELECT ... FOR UPDATE to an UPDATE statement. UPDATE operations for some reason are better at concurrency then SELECT FOR UPDATE. The update scans the PRIMARY index and updates the selected amount of rows defined by the LIMIT.&lt;br /&gt;&lt;br /&gt;UPDATE UserStack SET pid=getmypid(), state='processing' WHERE pid = 0 AND state='ready' &lt;span style="font-weight:bold;"&gt;LIMIT 10&lt;/span&gt;;&lt;br /&gt;&lt;br /&gt;SELECT * FROM UserStack WHERE pid = getmypid() AND state='processing';&lt;br /&gt;&lt;br /&gt;foreach user &lt;br /&gt;  process&lt;br /&gt;  mark as completed&lt;br /&gt;&lt;br /&gt;Rinse and repeat.&lt;br /&gt;&lt;br /&gt;The desired affect is complete. Each thread can grab its own work and each thread is guaranteed a unique user or block of users to process. The act of marking rows  (marking their territory) and grabbing the marked rows takes a fraction of seconds as it should.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;A good write ups on deadlocks&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.xaprb.com/blog/2006/08/03/a-little-known-way-to-cause-a-database-deadlock/"&gt;a-little-known-way-to-cause-a-database-deadlock&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6634698650893184527?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6634698650893184527/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6634698650893184527' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6634698650893184527'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6634698650893184527'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/05/how-to-cause-deadlocks-in-innodb-and.html' title='How to cause deadlocks in INNODB and fix them'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-2815341484019475741</id><published>2008-05-13T10:59:00.000-07:00</published><updated>2008-05-13T11:23:33.123-07:00</updated><title type='text'>Net Settings mySQL &amp; Memcache</title><content type='html'>Ever see this&lt;br /&gt;&lt;blockquote&gt;TCP: drop open request from 10.209.23.142/43407&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Well lets start with a more specific example:&lt;br /&gt;Memcache is tightly coupled in your code: Every request caches the response from the database so a lot of quick calls to memcache is made. Then you start adding full HTML to memcache instead of just caching the raw data; so now your load pattern is bigger blobs of data still at a high request rate.&lt;br /&gt;&lt;br /&gt;Now suddenly the memcache port hangs-you verify this by ssh to the box and then telneting the the memcache box port 11211 and see that ssh works (port 22) yet 11211 does not. As a result all your front ends fall over because they are hanging on the memcache port.&lt;br /&gt;&lt;br /&gt;THIS IS NOT A MEMCACHE PROBLEM. Its a kernel problem. Default installs of Linux set the TCP window buffer size to a desktop setting and not a server setting.&lt;br /&gt;&lt;br /&gt;So I run this script.&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;#!/bin/bash&lt;br /&gt;&lt;br /&gt;sysctl -w net/core/rmem_max=8738000&lt;br /&gt;sysctl -w net/core/wmem_max=6553600&lt;br /&gt;&lt;br /&gt;sysctl -w net/ipv4/tcp_rmem="8192 873800 8738000"&lt;br /&gt;sysctl -w net/ipv4/tcp_wmem="4096 655360 6553600"&lt;br /&gt;sysctl -w vm/min_free_kbytes=65536&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;I found this out by going &lt;a href="http://www.acc.umu.se/~maswan/linux-netperf.txt"&gt;here&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;This is the first kernel setting that I have seen make a real big difference.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;So my list of changes so far to the kernel default settings are (getting lazy in detail)&lt;br /&gt;&lt;br /&gt;vm.swappiness=0&lt;br /&gt;run the deadline scheduler&lt;br /&gt;&lt;br /&gt;On the filesystem side&lt;br /&gt;&lt;br /&gt;mount the datadir noatime&lt;br /&gt;use O_DIRECT&lt;br /&gt;if you have cache on a hardware raid card set the cache for writes (make sure you have a BBC)&lt;br /&gt;use Raid-10 or if you have the money + can take a hit on I/O RAID-6&lt;br /&gt;stripe size 128-256K&lt;br /&gt;&lt;br /&gt;I have some other tweaks that I'm forgetting but when I find them I'll post them.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-2815341484019475741?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/2815341484019475741/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=2815341484019475741' title='8 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2815341484019475741'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2815341484019475741'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/05/net-settings-mysql-memcache.html' title='Net Settings mySQL &amp; Memcache'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>8</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-642371888354597972</id><published>2008-05-08T13:42:00.000-07:00</published><updated>2008-05-08T13:55:34.559-07:00</updated><title type='text'>UAE Broken Proxys - how to work around them</title><content type='html'>In the UAE there is a goverment enforcement of all traffic to go through goverment proxy servers.  All requests are proxied and cached. What does this mean in the mysql world? Imagine you have a web application that records comments to a media object. Then a single person from the UAE makes a comment to that media object. The UAE Cache Proxy caches the HTTP-POST. Now every person in the country that visits your domain causes a HTTP-POST to that same media object. From a outside view it looks like content is being added to the system and the numbers are good. But ALL THE CONTENT is the SAME which is not valuable to the media owner nor the DBA who has to manage that data due to the BROKEN-CACHE-PROXY.&lt;br /&gt;&lt;br /&gt;So how do you bust the BROKEN-CACHE-PROXY? &lt;br /&gt;&lt;br /&gt;A simple method is to look at the contents of the last few posts and see if the same data is being added prior to insert-but this can be expensive at high scale-the scale that I deal with (billions of transactions per day).&lt;br /&gt;&lt;br /&gt;A simple approach is to just add a hidden variable to the POST with your webservers time. If you webserver time on the post &gt;= hidden variable time + 120 seconds then you know its a bad post.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-642371888354597972?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/642371888354597972/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=642371888354597972' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/642371888354597972'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/642371888354597972'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/05/uae-broken-proxys-how-to-work-around.html' title='UAE Broken Proxys - how to work around them'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-2389813508359969544</id><published>2008-05-05T12:27:00.000-07:00</published><updated>2008-05-05T12:30:38.171-07:00</updated><title type='text'>I just pre-ordered my High Performance MySQL: Optimization, Backups, Replication, and More</title><content type='html'>Did you pre-order yours?&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.amazon.com/gp/product/0596101716?ie=UTF8&amp;tag=perinc-20&amp;linkCode=as2&amp;camp=1789&amp;creative=9325&amp;creativeASIN=0596101716"&gt; Pre-Order yours today&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Peter and Byron are really smart guys and very methodical in their tests to make sure the conclusions produced are rock solid. I don't know whats in the book, but if these guys made it, its going to be good-that's how much faith I have.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-2389813508359969544?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/2389813508359969544/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=2389813508359969544' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2389813508359969544'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2389813508359969544'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/05/i-just-pre-ordered-my-high-performance.html' title='I just pre-ordered my High Performance MySQL: Optimization, Backups, Replication, and More'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5117314473788381957</id><published>2008-05-01T17:36:00.000-07:00</published><updated>2008-05-01T17:50:33.519-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='linux'/><category scheme='http://www.blogger.com/atom/ns#' term='cache'/><category scheme='http://www.blogger.com/atom/ns#' term='system'/><category scheme='http://www.blogger.com/atom/ns#' term='memory'/><title type='text'>Linux 64-bit,  MySQL,  Swap and Memory</title><content type='html'>The VM for Linux prefers system cache over application memory. What does this mean? The best way I can explain is by example.&lt;br /&gt;&lt;br /&gt;Imagine you have 32 GB of RAM&lt;br /&gt;MySQL is set to take 20 GB of RAM for a process based buffer and up to 6M for the various thread buffers.&lt;br /&gt;&lt;br /&gt;Over a period of time the box swaps. The only thing that is running is mysql and its memory size is around 21GB for resident memory. Why does swap grow when there is plenty of memory? The reason is when a memory alloc is needed (thread based buffer is tickled) the VM will choose to use swap over allocating from the system cache, when there is not enough free memory.&lt;br /&gt;&lt;br /&gt;DO NOT TURN OFF SWAP to prevent this. Your box will crawl, kswapd will chew up a lot of the processor, Linux needs swap enabled, lets just hope its not used.&lt;br /&gt;&lt;br /&gt;So how do you stop Nagios pages because of swap usage? Well if you have a few choices.&lt;br /&gt;&lt;br /&gt;reboot the box&lt;br /&gt;&lt;br /&gt;or &lt;br /&gt;&lt;br /&gt;stop mysql &amp;&amp; swapoff -a;swapon -a;&lt;br /&gt;&lt;br /&gt;or just&lt;br /&gt;&lt;br /&gt;swapoff -a;swapon -a;&lt;br /&gt;&lt;br /&gt;Doing the latter command is rather scary and fun at the same time. Because you can either crash mysql or not. I just did the swap* commands live, I was very certain nothing was using swap and it worked. YAY no more pages and I didn't have to shut down the service!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5117314473788381957?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5117314473788381957/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5117314473788381957' title='7 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5117314473788381957'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5117314473788381957'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/05/linux-64-bit-mysql-swap-and-memory.html' title='Linux 64-bit,  MySQL,  Swap and Memory'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>7</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6392128107776814352</id><published>2008-04-23T18:21:00.000-07:00</published><updated>2008-04-23T18:35:07.020-07:00</updated><title type='text'>Avoid storing Markup (HTML) in the database</title><content type='html'>I see this to often; Storing HTML in the database. Then UI wants to change the HTML, but the data grew to 100 GB, so the really only feasible way to change the HTML is via a post process after the database fetch. This post process produces a huge list of preg_replace statements to rebuild the HTML on display. This consumes a lot of memory over time and slows down the APP, plus its time consuming to debug.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Store URLS if the apps need to. Or better yet build the schema to store the bare minimum the app needs to generate the HTML.&lt;br /&gt;&lt;br /&gt;Here is a compelling reason why not to store HTML:&lt;br /&gt;&lt;br /&gt;The data needed to generate the HTML is 10 bytes, but the TEXT field consumes 1024 bytes, all of which is the same text.&lt;br /&gt;&lt;br /&gt;Thus for an app that should cost very little to maintain now costs 10 times as much.&lt;br /&gt;&lt;br /&gt;If your building a search engine strip out the HTML and store the text. If its a quick app, take this saying into account&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;The is nothing more Permanent then a temporary solution&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;A exception that I don't mind: php serialize - this is markup that (rarely) does not change.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6392128107776814352?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6392128107776814352/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6392128107776814352' title='7 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6392128107776814352'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6392128107776814352'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/04/avoid-storing-markup-html-in-database.html' title='Avoid storing Markup (HTML) in the database'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>7</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7373899529144362019</id><published>2008-04-22T00:43:00.000-07:00</published><updated>2008-04-22T00:44:55.511-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='mysqluc'/><category scheme='http://www.blogger.com/atom/ns#' term='mysqluc08'/><category scheme='http://www.blogger.com/atom/ns#' term='lots of writes'/><category scheme='http://www.blogger.com/atom/ns#' term='federation'/><title type='text'>mySQL uc2008 presentation</title><content type='html'>&lt;div style="display:none"&gt;&lt;script&gt;document.write('&lt;noscript&gt;');&lt;/script&gt;&lt;/div&gt; &lt;object codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=9,0,0,0" id="embedded_flash_2594652_qm9l6_object" name="embedded_flash_2594652_qm9l6_object" classid="clsid:d27cdb6e-ae6d-11cf-96b8-444553540000" align="middle" height="500" width="100%"&gt;  &lt;param name="movie" value="http://documents.scribd.com/ScribdViewer.swf?document_id=2594652&amp;access_key=key-2lhvp9qimui82cn2928t&amp;page=&amp;version=1"&gt;   &lt;param name="quality" value="high"&gt;   &lt;param name="play" value="true"&gt;  &lt;param name="loop" value="true"&gt;   &lt;param name="scale" value="showall"&gt;  &lt;param name="wmode" value="opaque"&gt;   &lt;param name="devicefont" value="false"&gt;  &lt;param name="bgcolor" value="#ffffff"&gt;   &lt;param name="menu" value="true"&gt;  &lt;param name="allowFullScreen" value="true"&gt;   &lt;param name="allowScriptAccess" value="always"&gt;   &lt;param name="salign" value=""&gt;  &lt;embed src="http://documents.scribd.com/ScribdViewer.swf?document_id=2594652&amp;access_key=key-2lhvp9qimui82cn2928t&amp;page=&amp;version=1" quality="high" pluginspage="http://www.macromedia.com/go/getflashplayer" play="true" loop="true" scale="showall" wmode="opaque" devicefont="false" bgcolor="#ffffff" name="embedded_flash_2594652_qm9l6_object" menu="true" allowfullscreen="true" allowscriptaccess="always" salign="" type="application/x-shockwave-flash" align="middle" height="500" width="100%"&gt;&lt;/embed&gt; &lt;/object&gt;&lt;div style="display:none"&gt; &lt;/noscript&gt; &lt;script type="text/javascript" src='http://www.scribd.com/javascripts/view.js'&gt;&lt;/script&gt;&lt;/div&gt;&lt;div id='embedded_flash_2594652_qm9l6' style="width:100%;height:100%"&gt;&lt;span style="display:none"&gt;Read this doc on Scribd: &lt;a href="http://www.scribd.com/doc/2594652/Record-every-Referral-for-Flickr-Realtime"&gt;Record every Referral for Flickr Realtime&lt;/a&gt;&lt;/span&gt; &lt;/div&gt; &lt;div style="display:none"&gt;&lt;script type="text/javascript"&gt;  var scribd_doc = new scribd.Document(2594652, 'key-2lhvp9qimui82cn2928t');       scribd_doc.write('embedded_flash_2594652_qm9l6');&lt;/script&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7373899529144362019?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7373899529144362019/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7373899529144362019' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7373899529144362019'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7373899529144362019'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/04/mysql-uc2008-presentation.html' title='mySQL uc2008 presentation'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-768298106700504326</id><published>2008-04-21T16:13:00.000-07:00</published><updated>2008-04-22T00:46:04.866-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='mysqluc'/><category scheme='http://www.blogger.com/atom/ns#' term='mysql'/><category scheme='http://www.blogger.com/atom/ns#' term='2007'/><category scheme='http://www.blogger.com/atom/ns#' term='presentation'/><category scheme='http://www.blogger.com/atom/ns#' term='federation'/><title type='text'>mySQL UC 2007 Presentation File</title><content type='html'>&lt;a href="http://www.scribd.com/doc/2592098/DVPmysqlucFederation-at-Flickr-Doing-Billions-of-Queries-Per-Day"&gt;Doing Billions of Queries per Day&lt;/a&gt;&lt;br /&gt;&lt;div style="display:none"&gt;&lt;script&gt;document.write('&lt;noscript&gt;');&lt;/script&gt;&lt;/div&gt; &lt;object codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=9,0,0,0" id="embedded_flash_2592098_iz99z_object" name="embedded_flash_2592098_iz99z_object" classid="clsid:d27cdb6e-ae6d-11cf-96b8-444553540000" align="middle" height="500" width="100%"&gt;  &lt;param name="movie" value="http://documents.scribd.com/ScribdViewer.swf?document_id=2592098&amp;access_key=key-26uujjyeg1djsea1tgcs&amp;page=&amp;version=1"&gt;   &lt;param name="quality" value="high"&gt;   &lt;param name="play" value="true"&gt;  &lt;param name="loop" value="true"&gt;   &lt;param name="scale" value="showall"&gt;  &lt;param name="wmode" value="opaque"&gt;   &lt;param name="devicefont" value="false"&gt;  &lt;param name="bgcolor" value="#ffffff"&gt;   &lt;param name="menu" value="true"&gt;  &lt;param name="allowFullScreen" value="true"&gt;   &lt;param name="allowScriptAccess" value="always"&gt;   &lt;param name="salign" value=""&gt;  &lt;embed src="http://documents.scribd.com/ScribdViewer.swf?document_id=2592098&amp;access_key=key-26uujjyeg1djsea1tgcs&amp;page=&amp;version=1" quality="high" pluginspage="http://www.macromedia.com/go/getflashplayer" play="true" loop="true" scale="showall" wmode="opaque" devicefont="false" bgcolor="#ffffff" name="embedded_flash_2592098_iz99z_object" menu="true" allowfullscreen="true" allowscriptaccess="always" salign="" type="application/x-shockwave-flash" align="middle" height="500" width="100%"&gt;&lt;/embed&gt; &lt;/object&gt;&lt;div style="display:none"&gt; &lt;/noscript&gt; &lt;script type="text/javascript" src='http://www.scribd.com/javascripts/view.js'&gt;&lt;/script&gt;&lt;/div&gt;&lt;div id='embedded_flash_2592098_iz99z' style="width:100%;height:100%"&gt;&lt;span style="display:none"&gt;Read this doc on Scribd: &lt;a href="http://www.scribd.com/doc/2592098/DVPmysqlucFederation-at-Flickr-Doing-Billions-of-Queries-Per-Day"&gt;DVPmysqlucFederation at Flickr: Doing Billions of Queries Per Day&lt;/a&gt;&lt;/span&gt; &lt;/div&gt; &lt;div style="display:none"&gt;&lt;script type="text/javascript"&gt;  var scribd_doc = new scribd.Document(2592098, 'key-26uujjyeg1djsea1tgcs');       scribd_doc.write('embedded_flash_2592098_iz99z');&lt;/script&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-768298106700504326?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/768298106700504326/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=768298106700504326' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/768298106700504326'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/768298106700504326'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/04/mysql-uc-2007-presentation-file.html' title='mySQL UC 2007 Presentation File'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8206127428628966852</id><published>2008-04-21T13:39:00.001-07:00</published><updated>2008-04-21T15:11:56.932-07:00</updated><title type='text'>Common Steps to Scale Linearly</title><content type='html'>Whenever I work at a place I do the following.&lt;br /&gt;&lt;br /&gt;Get a rundown of what the application is, what its demands are, what does the company expect the application to be a year from now - like how many users are going to use the application. 10 million, 20 million, 100 million?&lt;br /&gt;&lt;br /&gt;Then I find all the slowdowns:&lt;br /&gt;- What are the my.cnf settings?&lt;br /&gt;- What are the most active tables?&lt;br /&gt;- What type of SQL is being used?&lt;br /&gt;- How is the data accessed?&lt;br /&gt;- Who/What owns the data?&lt;br /&gt;- What is the Read-Write Ratio?&lt;br /&gt;- How many servers are used now to handle the site load, and how many are needed within a few months.&lt;br /&gt;- What is the reads per second, connections per second, writes per second&lt;br /&gt;- How does the data grow? MxN, MxNxO, N^4 etc.&lt;br /&gt;&lt;br /&gt;Once I get this down (a few days) then I change everything :)&lt;br /&gt;&lt;br /&gt;If the data is small and doesn't change often I don't bother federating that at 1st. I go for the meat of the product. My goal is to run mean, lean, cheap, fast, and easy to maintain. I love my sleep.&lt;br /&gt;&lt;br /&gt;So steps on federating:&lt;br /&gt;&lt;br /&gt;What is the main object?&lt;br /&gt;What are the mappings to this main object?&lt;br /&gt;Spread data out by this main object&lt;br /&gt;Cache lookups to the pointer where the main object data is.&lt;br /&gt;Build everything around the main object(s).&lt;br /&gt;Use a versioning system&lt;br /&gt;Document a global view of how things work, and make cookbooks-so someone else can wake up in the middle of the night. I love my sleep :)&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;At the same time get&lt;br /&gt;&lt;br /&gt;dsh working&lt;br /&gt;nagios working&lt;br /&gt;ganglia working&lt;br /&gt;custom tools working&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;What you have is an easy to use, maintained system that scales linearly as long as that main object is being referenced.&lt;br /&gt;&lt;br /&gt;Above is the easy stuff. The time-consuming part of the procedure is rewriting all the code to work for old and new, and migrate the new. This is needed to make sure no one if affected by upgrading to the new system.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8206127428628966852?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8206127428628966852/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8206127428628966852' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8206127428628966852'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8206127428628966852'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/04/common-steps-to-scale-linearly.html' title='Common Steps to Scale Linearly'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6863210369167529407</id><published>2008-04-14T10:18:00.000-07:00</published><updated>2008-04-14T10:30:07.705-07:00</updated><title type='text'>Take a Vacation</title><content type='html'>Are you a mySQL DBA that will not take a vacation  because your the only person to fix a disaster if one will occur? Do you take a vacation and stay at home with your pager on you? Ask you self when was the last time you went someplace for 9 days without a pager or computer? If work is more important to you then your own personal time, then its time to take a vacation.&lt;br /&gt;&lt;br /&gt;I just took a 9 day vacation in Hawaii, 4 days on the Big Island and the rest on Oahu. Man I haven't been this rested in years. I feel like a new man! Prior to my vacation I felt run-down and I was just going through the paces, now I'm rejuvenated-the ocean waves and hot air has given me new ideas on how to design back-end database data layouts, how to scale to new levels at less cost. I feel like I did when I graduated college-ready to take on the world-but better!&lt;br /&gt;&lt;br /&gt;Expect many new posts! Because the original Dathan is back better then evar!&lt;br /&gt;&lt;br /&gt;Hang-Loose Brutha!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6863210369167529407?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6863210369167529407/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6863210369167529407' title='9 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6863210369167529407'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6863210369167529407'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/04/take-vacation.html' title='Take a Vacation'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>9</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7806066497656238517</id><published>2008-03-25T18:27:00.001-07:00</published><updated>2008-03-25T18:36:35.788-07:00</updated><title type='text'>I write DB scripts in PHP</title><content type='html'>Lots of people like PERL, Python, Java or C to write database backfills, data repair, etc. I like writing in all these languages but what I don't like doing is writing the same code over and over again, so I write my DB code in the same language as the environment that I'm using. This means I write backend DB scripts in PHP, I get to reuse common DB paths, classes and functions and improve things when I see them.&lt;br /&gt;&lt;br /&gt;But, PHP if not written correctly will use up 2GB of memory easily from loosing reference to arrays or setting globals and forgetting about them; stuff like that. This is especially visible in long running applications.&lt;br /&gt;&lt;br /&gt;PEAR is not immune to these memory leaks. So, to get around reference problems in PEAR I do&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;$skiptrace =&amp; PEAR::getStaticProperty('PEAR_Error', 'skiptrace');&lt;br&gt;&lt;br /&gt;$skiptrace = false;&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;This prevents this error &lt;br /&gt;PHP Fatal error:  Allowed memory size of 268435456 bytes exhausted (tried to allocate 76 bytes) in /usr/share/pear/PEAR.php on line 872&lt;br /&gt;&lt;br /&gt;from long running processes.&lt;br /&gt;&lt;br /&gt;Thought I pass it along.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7806066497656238517?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7806066497656238517/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7806066497656238517' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7806066497656238517'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7806066497656238517'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/03/i-write-db-scripts-in-php.html' title='I write DB scripts in PHP'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1212312764724395428</id><published>2008-03-25T14:46:00.000-07:00</published><updated>2008-03-25T18:37:58.662-07:00</updated><title type='text'>MySQL 2008 UC Presentation Scaling a HUGE volumn of concurrent writes</title><content type='html'>The details can be found &lt;br /&gt;&lt;a href="http://en.oreilly.com/mysql2008/public/schedule/detail/588"&gt;&lt;br /&gt;http://en.oreilly.com/mysql2008/public/schedule/detail/588&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I've since moved on from Flickr to a new Job, but Flickr is still allowing me to give this talk. Flickr is so cool! &lt;br /&gt;&lt;br /&gt;The talk encompasses capacity planning and scaling for a heavy concurrent write and read environment, and when it makes sense to split resources out to a single application.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1212312764724395428?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1212312764724395428/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1212312764724395428' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1212312764724395428'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1212312764724395428'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/03/mysql-2008-uc-presentation-scaling-huge.html' title='MySQL 2008 UC Presentation Scaling a HUGE volumn of concurrent writes'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7678123010827782132</id><published>2008-03-12T18:44:00.000-07:00</published><updated>2008-03-12T18:53:53.522-07:00</updated><title type='text'>mySQL PHP PEAR::DB</title><content type='html'>So, I just fixed a behavior in &lt;a href="http://pear.php.net/package/DB"&gt;PEAR::DB&lt;/a&gt;. In isManip, a regular expression is called on every query that goes through the PEAR::DB layer. That sucks btw, but the purpose of function is to tell the classes that inherit from DB that the query passed is a query that affects data.&lt;br /&gt;&lt;br /&gt;The code is as follows &lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt; function isManip($query)&lt;br /&gt;    {&lt;br /&gt;        $manips = 'INSERT|UPDATE|DELETE|REPLACE|'&lt;br /&gt;                . 'CREATE|DROP|'&lt;br /&gt;                . 'LOAD DATA|SELECT .* INTO .* FROM|COPY|'&lt;br /&gt;                . 'ALTER|GRANT|REVOKE|'&lt;br /&gt;                . 'LOCK|UNLOCK';&lt;br /&gt;        if (preg_match('/^\s*"?(' . $manips . ')\s+/i', $query)) {&lt;br /&gt;            return true;&lt;br /&gt;        }&lt;br /&gt;        return false;&lt;br /&gt;    }&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Then in mysql.php there is this&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&lt;br /&gt;function affectedRows()&lt;br /&gt;    {&lt;br /&gt;        if ($this-&gt;_last_query_manip) {&lt;br /&gt;            return @mysql_affected_rows($this-&gt;connection);&lt;br /&gt;        } else {&lt;br /&gt;            return 0;&lt;br /&gt;        }&lt;br /&gt;     }&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Which is not ideal IMHO. I changed mysql.php to just return the results of mysql_affected_rows which is better anyway IMHO.&lt;br /&gt;&lt;br /&gt;What DB wrapper do you use? What do you think about PDO?&lt;br /&gt;&lt;br /&gt;Personally I like to write my own, bare bone wrappers around mysql_* functions for php but to make things compatible I'm using PEAR::DB.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7678123010827782132?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7678123010827782132/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7678123010827782132' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7678123010827782132'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7678123010827782132'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/03/mysql-php-peardb.html' title='mySQL PHP PEAR::DB'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8706082529603912134</id><published>2008-03-04T10:46:00.000-08:00</published><updated>2008-03-04T11:05:40.775-08:00</updated><title type='text'>MySQL multi datacenter HOT / HOT BCP</title><content type='html'>In a &lt;a href="http://mysqldba.blogspot.com/2008/02/bcp-mysql-without-loosing-speed.html"&gt;previous post&lt;/a&gt; I explained about BCP. I have just finished my latest and nearly final test, and all worked as expected. For about an hour certain front end servers where hitting a database shard in 1 datacenter while the rest of the front ends hit the same shard in another datacenter.&lt;br /&gt;&lt;br /&gt;What makes this incredible is that now data from mySQL can be close to geo-graphic locations of the end user, without having to make changes to the front end application. So fail over is silent from a database perspective if an entire datacenter is down. Actions outside of the application are also replicating seamlessly and in order. Latency is high but the goal is not to have a WWW in one datacenter talk to a shard in another datacenter. So, latency is in effect not an issue especially if one uses Akami-DNS to geo-graphically loadbalance your user base.&lt;br /&gt;&lt;br /&gt;This is a great simple solution that scales, and it only took 1 week to implement from the ground up.&lt;br /&gt;&lt;br /&gt;Other solutions that I have seen have a Proxy layer infront of the actual database that write data to one datacenter and synchronously writes data to another datacenter. So, your entire transaction time is the SUM of the transaction, for each server plus the SUM of the latency to talk to another datacenter. Also if that "Proxy Layer" goes down, or the stunnel goes down the application goes down. This is not ideal for me.&lt;br /&gt;&lt;br /&gt;The solution which I designed and was implemented+tweaked by our master java engineer: removes these layers and makes the data transfer independent of the application. So, if that layer dies the application does not die-it just gets restarted and catches up to the latest events.&lt;br /&gt;&lt;br /&gt;I love when things work.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8706082529603912134?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8706082529603912134/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8706082529603912134' title='8 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8706082529603912134'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8706082529603912134'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/03/mysql-multi-datacenter-hot-hot-bcp.html' title='MySQL multi datacenter HOT / HOT BCP'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>8</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6237986001734241092</id><published>2008-03-03T11:32:00.000-08:00</published><updated>2008-03-03T13:54:25.030-08:00</updated><title type='text'>The oldest component is finally finished</title><content type='html'>I have finalized the design for Federating Connections and part of the design has been implemented so far with amazing results. Overnight the dependency on replication has been reduced. Feed updates no longer are lagging and query load doubled without the need for new hardware. &lt;br /&gt;&lt;br /&gt;Social connections (not mySQL connections) at Flickr in particular are directly responsible for permission levels. This allows members to see into a another members photostream. This global requirement means that every logged in page viewed on Flickr requires a database read  if the page is not the members own page. So if the cluster is down, all access to photostreams default to the most restrictive state, i.e. public photos only.&lt;br /&gt;&lt;br /&gt;As a result, the service needs to be extremely responsive on reads, since possibly every page view on Flickr could produce a realtime query on the contact cluster. &lt;br /&gt;&lt;br /&gt;Next, the data has to be redundant and always available. This is very hard to do, when you have no spare servers, and only two servers to do this entire procedure-yet we did it. We recorded all photo permission change events, created the new method, backfilled and applied all change events to the new schema layout. &lt;br /&gt;&lt;br /&gt;Finally the new design allows for more features and more requests to the system with the ability to spread data across N severs.&lt;br /&gt;&lt;br /&gt;Two servers are all that is needed now, with the next phase spreading the data across more servers - with NO memcache or cache layer at all.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6237986001734241092?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6237986001734241092/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6237986001734241092' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6237986001734241092'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6237986001734241092'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/03/oldest-component-is-finally-finished.html' title='The oldest component is finally finished'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6570599176001425515</id><published>2008-02-25T12:37:00.000-08:00</published><updated>2008-02-25T13:05:40.108-08:00</updated><title type='text'>BCP mySQL without loosing speed</title><content type='html'>BCP stands for Business Continuity Planning - basically a fancy name to describe handling the situation when a DC goes offline. Since we use dual master replication for our servers - putting a master in another DC is not possible - without a special layer. The reason: A slave can have only one master, while a master can have many slaves. So, mysql replication for a dual master setup is not valid, unless there is a replication ring from one DC to another DC. You do not want to do this due to latency.&lt;br /&gt;&lt;br /&gt;To get around these limiting factors in mysql, we have developed an application, using my design, to get write events from one DC to another DC over a stunnel. &lt;br /&gt;&lt;br /&gt;To get data from one DC to another DC we need to do it encrypted. If there is a man in the middle, we don't want them sniffing our traffic-so we use stunnel to encrypt the data. I will not use SSL encrypted replication since managing that requires a mysql restart. For something as simple as replication data, we shouldn't have to restart mysql.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;stunnel setup&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.edna.narrabilis.com/2006/06/01/stunnel-for-mysql-server-and-client/"&gt; here is a good writeup&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;For the version of stunnel that we are running I needed to add this: wait_for_readable = 0 - which tells the accepting server NOT to wait for headers to be sent.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now to make sure it's fast, we need to transfer as little data as possible-I will not go into details of how this is implemented yet. If people are interested I may release a generic version of the application that will get data from one location to another location, consistent, ordered etc-which is application independent. The last statement means that the application does not need to change for this to work.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6570599176001425515?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6570599176001425515/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6570599176001425515' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6570599176001425515'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6570599176001425515'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/02/bcp-mysql-without-loosing-speed.html' title='BCP mySQL without loosing speed'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7771604687105754693</id><published>2008-02-01T15:02:00.000-08:00</published><updated>2008-02-01T15:46:42.164-08:00</updated><title type='text'>Adding Capacity is FUN</title><content type='html'>At Flickr adding capacity is easy. Since we are able to scale at a function of user growth, independent of hardware classes. This makes adding new hardware easy. I'm able to adjust the server weight on the FLY to give more users to a certain class of hardware over another.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Changing schema is easy as well and now I do it much safer. In the pass I would turn off a master for each partitioned dataset all at the same time: then stay up for 20+ hours and execute an alter across the entire server farm, that was off line.&lt;br /&gt;&lt;br /&gt;Now I do it in stages and do it throughout the week. Its a little slower but I get to sleep. Additionally with the new method I am able to do more all at the same time-like rebuild the entire dataset for a partition.&lt;br /&gt;&lt;br /&gt;Here are my steps for doing a change that takes more then 10 hours:&lt;br /&gt;&lt;br /&gt;remove the servers from the site config&lt;br /&gt;push my.cnf.maintenance to the servers not taking on site traffic&lt;br /&gt; - this starts mysql on port 3307 and turns off replication and binary logging&lt;br /&gt; - this step is crucial to tell our backend bots not to do stuff on the servers in maintenance.&lt;br /&gt;&lt;br /&gt;mkdir -p /var/lib/mysql/restore&lt;br /&gt;restart mysql&lt;br /&gt;DUMP all data using mysqldump and SELECT *,[NEW FIELD DEFAULT VALUE1...VALUE-N] INTO OUTFILE '/var/lib/mysql/restore/[TABLE].txt 8 processes at a time&lt;br /&gt;&lt;br /&gt;stop mysql&lt;br /&gt;remove all of innodb datafiles&lt;br /&gt;copy data with 8-20 processes to the filer to sustain 50MB a sec. Pretty nify script&lt;br /&gt;umount /var/lib/mysql&lt;br /&gt;mkfs.ext3 -Tlargefile4 -LMYSQL [device]&lt;br /&gt;mount -a&lt;br /&gt;&lt;br /&gt;copy some of the data back (everything except the txt files)&lt;br /&gt;start mysql&lt;br /&gt;LOAD data in parallel about 8 processes at a time&lt;br /&gt;&lt;br /&gt;verify that utf8 data was loaded properly&lt;br /&gt;push my.cnf.production&lt;br /&gt;restart mysql&lt;br /&gt;wait about 45 mins for 12 hours of events to catch up.&lt;br /&gt;put it back into the site config&lt;br /&gt;&lt;br /&gt;I do these steps across 6-10 servers at a time-using a dsh like application&lt;br /&gt;&lt;br /&gt;Your question may be why are you rebuilding the filesystem and using ext3? Well EXT-3 is my only option and it fragments really bad-messing with performance. So if I am going to do a massive alter-I throw that in there for good measure-I plan on doing this once a year.&lt;br /&gt;&lt;br /&gt;I do not do this procedure for alters that last less then 10 hours, but if I alter Flickr's main table which holds most of the data-I take the opportunity to shrink all the datafiles.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7771604687105754693?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7771604687105754693/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7771604687105754693' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7771604687105754693'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7771604687105754693'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/02/adding-capacity-is-fun.html' title='Adding Capacity is FUN'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-2532736229399593737</id><published>2008-01-30T11:39:00.000-08:00</published><updated>2008-01-30T11:43:29.178-08:00</updated><title type='text'>Flickr is Hiring for a DBA position!!</title><content type='html'>We're looking for an experienced and motivated MySQL DBA to help make things go at Flickr.&lt;br /&gt;&lt;br /&gt;We're looking for a DBA with strong LAMP background.&lt;br /&gt;&lt;br /&gt;Specific Duties:&lt;br /&gt;• Work with engineers on performance tuning, query optimization, index tuning.&lt;br /&gt;• Monitor databases for problems and to diagnose where those problems are.&lt;br /&gt;• Work with Senior DBA to maintain a scalable, reliable, and robust database environment.&lt;br /&gt;• Build database tools and scripts to automate where possible.&lt;br /&gt;• Support MySQL databases for production and development.&lt;br /&gt;• Provide 24x7 escalated on-call support on a pager rotation.&lt;br /&gt;&lt;br /&gt;Requirements &amp; Education:&lt;br /&gt;• 3-4+ years MySQL experience.&lt;br /&gt;• 2+ years of experience as a MySQL DBA in a high traffic, transactional environment.&lt;br /&gt;• 2+ years working in a LAMP environment, particularly PHP/MySQL combination.&lt;br /&gt;• Proficient with database performance strategies.&lt;br /&gt;• Proficient tuning MySQL processes and queries.&lt;br /&gt;• Experience in administration of InnoDB &lt;br /&gt;• Experience with MySQL Replication, with both Master-Slave and Master-Master replication.&lt;br /&gt;• Ability to work cooperatively with software engineers and system administrators.&lt;br /&gt;• Excellent communication skills&lt;br /&gt;• Exceptional problem-solving expertise and attention to detail.&lt;br /&gt;• BS in Computer Science or equivalent.&lt;br /&gt;&lt;br /&gt;Bonus Points For:&lt;br /&gt;• Experience with Data Sharding and federated architectures.&lt;br /&gt;• Experience with multi-datacenter MySQL replication.&lt;br /&gt;• Experience working in a social media environment.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Send me an email with your resume.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-2532736229399593737?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/2532736229399593737/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=2532736229399593737' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2532736229399593737'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2532736229399593737'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/01/flickr-is-hiring-for-dba-position.html' title='Flickr is Hiring for a DBA position!!'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7419521447664254691</id><published>2008-01-24T13:44:00.000-08:00</published><updated>2008-01-24T14:08:22.562-08:00</updated><title type='text'>filefrag a DBA's Best Friend</title><content type='html'>EXT3 has performance problems as the filesystem gets fragmented-although this is counter intuitive to the design of EXT3-fragmentation really happens. &lt;br /&gt;&lt;br /&gt;Really there is no safe de-fragmenter tool out there for ext3. There is this &lt;a href="http://www2.lut.fi/~ilonen/ext3_fragmentation.html"&gt;one &lt;/a&gt; but I will not use it.&lt;br /&gt;&lt;br /&gt;Fragmentation can get so bad that performance from EXT-3 can drop 7 fold!!&lt;br /&gt;&lt;br /&gt;Here is &lt;a href="http://www.sabi.co.uk/Notes/linuxFS.html"&gt;some good info&lt;/a&gt; on fragmentation and a comparison of various other file systems.&lt;br /&gt;&lt;br /&gt;Now for filefrag, I use this to see how bad an innodb file is fragmented.&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;TABLE.ibd: 4020 extents found, perfection would be 298 extents&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Our wiz of a system administrator Kevin M., who I am teaching to become a mySQL DBA got me hooked on this utility: and he came up with a good method to fix this.&lt;br /&gt;&lt;br /&gt;cp TABLE.ibd to a new location&lt;br /&gt;rm TABLE.ibd&lt;br /&gt;cp TABLE.ibd from new location back to the Database Directory&lt;br /&gt;&lt;br /&gt;Tada fragmentation is nearly gone. FSCK will help as well.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7419521447664254691?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7419521447664254691/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7419521447664254691' title='6 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7419521447664254691'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7419521447664254691'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/01/filefrag-dbas-best-friend.html' title='filefrag a DBA&apos;s Best Friend'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>6</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-155079001059467250</id><published>2008-01-18T11:34:00.000-08:00</published><updated>2008-01-24T13:40:49.466-08:00</updated><title type='text'>O_DIRECT + EXT3 Update</title><content type='html'>A few days ago I wrote about &lt;a href="http://mysqldba.blogspot.com/2008/01/do-not-use-odirect-with-ext3.html"&gt;O_DIRECT + EXT3 not working&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;This is not a wide-spread problem, and may be isolated to 2.6.9. So, it makes sense that others who run O_DIRECT with EXT3 do not see the issue. I will use this post for future updates.&lt;br /&gt;&lt;br /&gt;Ok here is the research that I did, and found the cause of my O_DIRECT problem&lt;br /&gt;&lt;br /&gt;&lt;table&gt;&lt;br /&gt;&lt;tr&gt;&lt;br /&gt;&lt;th&gt; RHEL Bug ID &lt;/th&gt;&lt;br /&gt;&lt;th&gt; Description &lt;/th&gt;&lt;br /&gt;&lt;/tr&gt;&lt;br /&gt;&lt;tr&gt;&lt;br /&gt;&lt;td&gt;161985&lt;/td&gt;&lt;br /&gt;&lt;td&gt;&lt;i&gt;O_DIRECT on RHEL v4 may not return correct number of bytes when concurrent I/O&lt;/i&gt;&lt;/td&gt;&lt;br /&gt;&lt;/tr&gt;&lt;br /&gt;&lt;tr&gt;&lt;br /&gt;&lt;td&gt;178084&lt;/td&gt;&lt;br /&gt;&lt;td&gt;&lt;i&gt;Last AIO read of a file opened with O_DIRECT returns wrong length&lt;/i&gt;&lt;/td&gt;&lt;br /&gt;&lt;/tr&gt;&lt;br /&gt;&lt;tr&gt;&lt;br /&gt;&lt;td&gt;178720&lt;/td&gt;&lt;br /&gt;&lt;td&gt;&lt;i&gt;O_DIRECT bug when reading last block of sparse file&lt;/i&gt;&lt;/td&gt;&lt;br /&gt;&lt;/tr&gt;&lt;br /&gt;&lt;tr&gt;&lt;br /&gt;&lt;td&gt;191736&lt;/td&gt;&lt;br /&gt;&lt;td&gt;&lt;i&gt;CVE-2004-2660 O_DIRECT write sometimes leaks memory&lt;/i&gt;&lt;br /&gt;&lt;/td&gt;&lt;/tr&gt;&lt;br /&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;So, I can't use O_DIRECT&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-155079001059467250?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/155079001059467250/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=155079001059467250' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/155079001059467250'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/155079001059467250'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/01/odirect-ext3-update.html' title='O_DIRECT + EXT3 Update'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7299630805059728971</id><published>2008-01-15T16:59:00.000-08:00</published><updated>2008-01-15T17:12:12.056-08:00</updated><title type='text'>MySQL support == AWSOME</title><content type='html'>Well, none of my issues have been fixed yet but MySQL support is on top of it.&lt;br /&gt;&lt;br /&gt;I've ran into many S1 bugs: all at the same time. Support has been able to help me identify them. Some of them have proposed fixes, some fixes are being tested in&lt;br /&gt;5.0.54.&lt;br /&gt;&lt;br /&gt;MySQL is by far the best Open source Database on the planet-support reflects that fact. I highly recommend getting a support contract to trouble shoot issues that make it into production, less learning the entire mysql code base and doing it yourself. (I know alot about the code-base but the 5 issues I am tracking was to much for me to debug alone. On top of that I don't know enough of the code base to make fixes to some of the bugs.)&lt;br /&gt;&lt;br /&gt;If you do more then 30K selects per second across all your servers, get piece of mind that someone will do there best to address any issues that you can't figure out. Get a MySQL support contract today.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7299630805059728971?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7299630805059728971/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7299630805059728971' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7299630805059728971'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7299630805059728971'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/01/mysql-support-awsome.html' title='MySQL support == AWSOME'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1770278078585734948</id><published>2008-01-14T10:52:00.000-08:00</published><updated>2008-01-14T14:31:32.575-08:00</updated><title type='text'>MAJOR Problems in mysql-5.0.51</title><content type='html'>mysql-5.0.51 causes huge blocking locks under high load. &lt;br /&gt;&lt;br /&gt;It also causes relay-log corruptions. For instance, how can a syntax error make it into replication, the reason is mysql-5.0.51 is truncating the stream.&lt;br /&gt;&lt;br /&gt;The only work around is to rebuild the relay log.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://bugs.mysql.com/bug.php?id=26489"&gt; The BUG &lt;/a&gt; is listed here for replication problems. I suspected IO_CACHE corruption as the cause.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;update: this is the cause of the huge blocks&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;InnoDB: Warning: a long semaphore wait:&lt;br /&gt;--Thread 1173899616 has waited at btr0cur.c line 424 for 292.00 seconds the semaphore:&lt;br /&gt;S-lock on RW-latch at 0x2df5159f58 created in file buf0buf.c line 497&lt;br /&gt;a writer (thread id 1173899616) has reserved it in mode  exclusive&lt;br /&gt;number of readers 0, waiters flag 1&lt;br /&gt;Last time read locked in file btr0cur.c line 424&lt;br /&gt;Last time write locked in file buf0buf.c line 1768&lt;br /&gt;InnoDB: Warning: a long semaphore wait:&lt;br /&gt;--Thread 1172302176 has waited at btr0cur.c line 424 for 295.00 seconds the semaphore:&lt;br /&gt;S-lock on RW-latch at 0x2df50706a8 created in file buf0buf.c line 497&lt;br /&gt;a writer (thread id 1172302176) has reserved it in mode  exclusive&lt;br /&gt;number of readers 0, waiters flag 1&lt;br /&gt;Last time read locked in file btr0cur.c line 424&lt;br /&gt;Last time write locked in file buf0buf.c line 1768&lt;br /&gt;InnoDB: Warning: a long semaphore wait:&lt;br /&gt;--Thread 1182951776 has waited at trx0trx.c line 1627 for 283.00 seconds the semaphore:&lt;br /&gt;Mutex at 0x2a9eac52b8 created file srv0srv.c line 872, lock var 0&lt;br /&gt;waiters flag 0&lt;br /&gt;InnoDB: ###### Starts InnoDB Monitor for 30 secs to print diagnostic info:&lt;br /&gt;InnoDB: Pending preads 1, pwrites 0&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Additionally mySQL is putting self in swap for a memory config that has not changed from my previous version.&lt;br /&gt;&lt;br /&gt;I'm thinking that there is some memory leak in 5.0.51&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1770278078585734948?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1770278078585734948/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1770278078585734948' title='8 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1770278078585734948'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1770278078585734948'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/01/major-problems-in-mysql-5051.html' title='MAJOR Problems in mysql-5.0.51'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>8</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7723056169771287284</id><published>2008-01-14T10:46:00.000-08:00</published><updated>2008-01-14T14:52:42.476-08:00</updated><title type='text'>DO NOT USE O_DIRECT with EXT3</title><content type='html'>O_DIRECT under high load causes these issues&lt;br /&gt;&lt;br /&gt;This is a symptom&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;Losing some ticks... checking if CPU frequency changed.&lt;br /&gt;warning: many lost ticks.&lt;br /&gt;Your time source seems to be instable or some driver is hogging interrupts&lt;br /&gt;rip __do_softirq+0x4d/0xd0&lt;br /&gt;ttyS1: 1 input overrun(s)&lt;br /&gt;ttyS1: 1 input overrun(s)&lt;br /&gt;ttyS1: 1 input overrun(s)&lt;br /&gt;ttyS1: 1 input overrun(s)&lt;br /&gt;ttyS1: 1 input overrun(s)&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;h3&gt;BUT THE REAL PROBLEM is it Locks up the partition that the ibdata file is on.&lt;/h3&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Systems where the server locked up.&lt;br /&gt;&lt;br /&gt;&lt;b&gt; 2.6.9-34.ELsmp #1 SMP Fri Feb 24 16:56:28 EST 2006 x86_64 x86_64 x86_64 GNU/Linux &lt;/b&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;          total       used       free     shared    buffers     cached&lt;br /&gt;Mem:      16412760   16389976      22784          0      82440     686368&lt;br /&gt;-/+ buffers/cache:   15621168     791592&lt;br /&gt;Swap:      8393952        144    8393808&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;I had O_DIRECT running in production for over a month on some pretty loaded servers, but once I put it on some older servers, all hell broke loose.&lt;br /&gt;&lt;br /&gt;&lt;i&gt; Pretty Loaded is defined as &lt;/i&gt;&lt;br /&gt;&lt;br /&gt;1000 qps mainly selects mixed with large ranges at a high concurrency of 30 threads.&lt;br /&gt;&lt;br /&gt;CPU WIO is around 10-15% (acceptable thresholds)&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;If you insist on running O_DIRECT I recommend&lt;br /&gt;&lt;br /&gt;1. Test O_DIRECT on ever OS version in your farm&lt;br /&gt;2. Test O_DIRECT by producing so much load that it's unrealistic.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7723056169771287284?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7723056169771287284/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7723056169771287284' title='9 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7723056169771287284'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7723056169771287284'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/01/do-not-use-odirect-with-ext3.html' title='DO NOT USE O_DIRECT with EXT3'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>9</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1183908447183529575</id><published>2008-01-08T21:17:00.001-08:00</published><updated>2008-01-08T21:18:27.867-08:00</updated><title type='text'>What would you like me to write about?</title><content type='html'>&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1183908447183529575?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1183908447183529575/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1183908447183529575' title='8 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1183908447183529575'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1183908447183529575'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2008/01/what-would-you-like-me-to-write-about.html' title='What would you like me to write about?'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>8</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3791409046426686302</id><published>2007-12-13T15:53:00.000-08:00</published><updated>2007-12-18T16:14:34.999-08:00</updated><title type='text'>Flickr Stats how is it built</title><content type='html'>&lt;UL&gt;Flickr Stats how is it built.&lt;br /&gt;&lt;LI&gt;All Collection is done realtime&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt;MYISAM and INNODB is used&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt;The data is spread across 6 clusters (12 servers-6 used, 6 for fail over) mainly for data storage requirements&lt;br /&gt;&lt;/LI&gt;&lt;LI&gt;Memcache is not used at all in the core of the product.&lt;/LI&gt;&lt;br /&gt;&lt;/UL&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;In summary this was the longest project that I worked on, other then rebuilding the backend for Flickr when I first came on. The inner workings are very complex to achieve real-time collection-while not affecting page load times of a photo page. Most of my time was spent on creating a distributed lock once my DB design was solid.&lt;br /&gt;&lt;br /&gt;Things that would really make life easier:&lt;br /&gt;&lt;br /&gt;MYSQL AB gets rid of MYISAM and makes PBXT its replacement. I don't need all the great features of INNODB but I would like some. I'll go more into this later.&lt;br /&gt;&lt;br /&gt;Additionally INSERT DELAYED worked with ON DUPLICATE UPDATE. Currently it does not.&lt;br /&gt;&lt;br /&gt;Finally cross Engine Transactions would be cool but really not required.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3791409046426686302?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3791409046426686302/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3791409046426686302' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3791409046426686302'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3791409046426686302'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/12/flickr-stats-how-is-it-built.html' title='Flickr Stats how is it built'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-5593774979327982229</id><published>2007-12-11T13:41:00.000-08:00</published><updated>2008-01-02T20:46:55.044-08:00</updated><title type='text'>Distributed Locking Solving it with mySQL</title><content type='html'>At Flickr we use a Federated Architecture, where the WHOLE of the data is spread across many servers. This means that when you load a photo page the dynamic data generated is hitting a server that holds that data, while another photo page view may hit another server for content.&lt;br /&gt;&lt;br /&gt;Now to do complex operations to control the flow of what operates on data, the complex operation must lock the data. Let's call this a global lock, which is used to coordinate many jobs. Now to make these operations fast, lets run many processes across many servers. Essentially we needed to solve a hard problem in parallel computing: distributed locking.&lt;br /&gt;&lt;br /&gt;This took a bunch of dedicated time to get done properly. We solved the issue by using 3 database clusters.&lt;br /&gt;&lt;br /&gt;First I used the coolest mysql feature out there; GET_LOCK and IS_FREE_LOCK. These two mysql function are the heart of the distributed lock. &lt;br /&gt;&lt;br /&gt;Why use them both?&lt;br /&gt;&lt;br /&gt;GET_LOCK will lock a string in MEMORY on mysql, and anything else trying to get a lock on that string will have to wait until TIMEOUT is reached.&lt;br /&gt;&lt;br /&gt;Since the TIMEOUT is in seconds, I don't want to wait and slow down everything, so we call IS_FREE_LOCK first which will return in a fraction of a ms&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;How do we use it?&lt;br /&gt;&lt;br /&gt;We lock an entire Account classification by setting the lock on the owners Cluster.&lt;br /&gt;An account classification is defined as ACCOUNT_123_TASK - which is the string for the lock, this is hashed to the same SERVER where the account is located.&lt;br /&gt;&lt;br /&gt;Once the lock is established check another cluster to ensure that we have not performed the action already-this ensures that the event that entered the system doesn't corrupt data if the message to process made it into the system more then once.&lt;br /&gt;&lt;br /&gt;Finally operate on the data, if any of the operations fail ROLLBACK all changes, log and continue.&lt;br /&gt;&lt;br /&gt;The cool thing about this is that the locks and data are all in the same place. So, if the lock is not achievable then the data is unaffected.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-5593774979327982229?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/5593774979327982229/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=5593774979327982229' title='8 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5593774979327982229'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/5593774979327982229'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/12/distributed-locking-solving-it-with.html' title='Distributed Locking Solving it with mySQL'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>8</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-655691027660553282</id><published>2007-11-27T15:30:00.000-08:00</published><updated>2007-11-28T22:34:26.694-08:00</updated><title type='text'>When starting out building an application should I use Complex Joins?</title><content type='html'>I get questions on how to build mysql application from time to time, and I wanted to share the response to the question below.&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;&lt;br /&gt;In the beginning, should a developer try and stay away from complex JOINS? Is it better to use many smaller queries and cache the results (using something like memcached) rather than 1 larger query requiring multiple table JOINS?&lt;br /&gt;&lt;br /&gt;Needing to scale for millions of users is a great problem to have, but should I worry about it in the beginning? Whats the best approach to db design for day 1?&lt;br /&gt;&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Well IMHO build the application, build it to spec. When the scope changes (and this will always happen) flow with it. This is in no means a knock against Product-I actually look forward to this, because things are being discussed and if something needs to change lets do it! &lt;br /&gt;&lt;br /&gt;Now how to build the application to scale: Look at the scope of a page and gauge the frequency of access of that page. For instance on a page displaying 1 photo-very few if any ranges and/or group-by and/or order-by and/or joins would be used to generate this page since a photo page needs to generate fast or loose the interest of the page viewer. On the other hand a page displaying the accounts holder account information thats accessed at a order of magnitude less rate-this page can have tougher queries. (Note: watch out for contention)&lt;br /&gt;&lt;br /&gt;Once the feature set has been decided upon; take the application apart and find the bottle necks. Highlight the issues that will not scale. &lt;br /&gt;&lt;br /&gt;This can be figured out by filling your dev-database with bogus information and running mysqlslap of common select queries to test stability, response time, speed etc.&lt;br /&gt;&lt;br /&gt;Take into account how scale can be achieved before changing code.&lt;br /&gt; &lt;br /&gt;Such as is the hardware ok to run all my services? &lt;br /&gt;Can I split some services out like move WWW specific traffic to a separate box from the database? (You will be surprised how many people combine the servers)&lt;br /&gt;Am I allocating mysql resources properly? If not how can I tell?&lt;br /&gt;What is my system reporting as the lack of resources? Am I running out of CPU or IO?&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The real performance gains come from laying out the data properly and reducing the amounts of sorts, ranges, joins needed to generate your dynamic pages.&lt;br /&gt;&lt;br /&gt;In some cases I've had to add tables where data in said tables represent start points to range in other tables. Then on top of that spread that core-logic across MANY individual machines just to get a page to load in less then 300 ms.&lt;br /&gt;&lt;br /&gt; &lt;br /&gt;Finally add Nagios to alert you when things go wrong such as too many procs running on this database server and Ganglia to chart system resources over time.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;UL&gt;So, to answer the question should I use complex joins at 1st? &lt;br /&gt;&lt;LI&gt; Sure, but here is a rule of thumb-the harder the query is the less it can be used.&lt;br /&gt;&lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; If traffic ramps up, look to see what the hardest queries are and their frequency of use. Try to reduce these execution times by spreading the data out to more tables or more servers, only if benchmarks show that this is the way to go.&lt;br /&gt;&lt;/LI&gt;&lt;br /&gt;&lt;br /&gt;&lt;LI&gt; If traffic really ramps up because the application is the next best thing since Flickr, then look at the larger picture of what should be done to scale. &lt;br /&gt;&lt;/LI&gt;&lt;br /&gt;&lt;/UL&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-655691027660553282?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/655691027660553282/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=655691027660553282' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/655691027660553282'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/655691027660553282'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/11/when-starting-out-building-application.html' title='When starting out building an application should I use Complex Joins?'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-4745755115474423618</id><published>2007-10-10T14:33:00.000-07:00</published><updated>2007-10-10T15:48:52.826-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='internals'/><category scheme='http://www.blogger.com/atom/ns#' term='filesize'/><category scheme='http://www.blogger.com/atom/ns#' term='INNODB'/><title type='text'>INNODB file_per_table growth based on Data Added</title><content type='html'>How does INNODB in mysql 5.0 tablespace per file grow?&lt;br /&gt;&lt;br /&gt;I can just add data and hope to catch when the table space grows, but I really want to understand the internals of INNODB and build an accurate modal on how big the table space will grow when X rows are added.&lt;br /&gt;&lt;br /&gt;&lt;UL&gt;Some things that need to be known. &lt;br /&gt;&lt;LI&gt;Data stored in INNODB is stored as pages with a default size of 16KB.&lt;br /&gt;&lt;LI&gt;INNODB adds a page for every secondary INDEX&lt;/LI&gt;&lt;br /&gt;&lt;LI&gt;INNODB has some overhead of about 18 bytes (maybe more) per row &lt;/LI&gt;&lt;br /&gt;&lt;/UL&gt;&lt;br /&gt;&lt;br /&gt;First let's set up a test environment&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;delimiter //&lt;br /&gt;&lt;br /&gt;CREATE PROCEDURE resettest()&lt;br /&gt;&lt;br /&gt; begin  &lt;br /&gt;        DROP TABLE IF EXISTS testvarchar;&lt;br /&gt;        CREATE TABLE `testvarchar` (&lt;br /&gt;                        `id` int(10) unsigned NOT NULL,&lt;br /&gt;                        `data_key` varchar(255) default NULL,&lt;br /&gt;                        `data_nokey` varchar(255) default NULL,&lt;br /&gt;                        PRIMARY KEY  (`id`),&lt;br /&gt;                        KEY `data_key` (`data_key`)&lt;br /&gt;        ) ENGINE=InnoDB DEFAULT CHARSET=latin1;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt; end;&lt;br /&gt;//&lt;br /&gt;&lt;br /&gt;delimiter ;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Now set up some inserts to populate the table&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;delimiter //&lt;br /&gt;&lt;br /&gt;CREATE PROCEDURE populate(IN param1 INT)&lt;br /&gt;&lt;br /&gt; begin  &lt;br /&gt;&lt;br /&gt;   declare i int; &lt;br /&gt;&lt;br /&gt;   set i = 0;&lt;br /&gt;&lt;br /&gt;   start transaction;&lt;br /&gt;&lt;br /&gt;   while i &lt; param1 do&lt;br /&gt;&lt;br /&gt;     insert into testvarchar (id, data_key, data_nokey)&lt;br /&gt;&lt;br /&gt;            values (i, repeat("-", 1), repeat("-", 250));&lt;br /&gt;&lt;br /&gt;     set i = i + 1; &lt;br /&gt;&lt;br /&gt;   end while;&lt;br /&gt;&lt;br /&gt;   commit;&lt;br /&gt;&lt;br /&gt; end;&lt;br /&gt;//&lt;br /&gt;&lt;br /&gt;delimiter ;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Now from this I know that the size of each record is&lt;br /&gt;id - 4 bytes&lt;br /&gt;data_key - 2 bytes (varchar there is a pad of a single byte)&lt;br /&gt;data_nokey - 251 bytes (ditto)&lt;br /&gt;TOTAL: 257 bytes&lt;br /&gt;&lt;br /&gt;Since there data_key AND it's not the PRIMARY INDEX-a page is allocated just for it. So let's sum up the data.&lt;br /&gt;&lt;br /&gt;id - 4 bytes //PK&lt;br /&gt;data_key - 2 bytes //Secondary Index&lt;br /&gt;Total: 6 bytes&lt;br /&gt;&lt;br /&gt;for a total 257 bytes for the table plus 6 bytes for the index for a total of 263 bytes.&lt;br /&gt;&lt;br /&gt;Now there is some overhead on INNODB tables, there is a Record Header which is 5 bytes and the secondary index references the PRIMARY KEY (the clustered index), Plus a transaction-id which is 6 bytes and a roll pointer which is 7 bytes for the undo log&lt;br /&gt;&lt;br /&gt;So the record size is 263 (INDEX &amp;&amp; DATA) + INNODB OVERHEAD (17) = 280 bytes.&lt;br /&gt;&lt;br /&gt;Now since the INDEX takes a page and data takes a page I need to take&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;32768/280 == NUMBER OF RECORDS to make the TABLE SPACE GROW 32 KB&lt;br /&gt;This == 117 rows;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;B&gt; The start of the datafile is 112K which is 96K + Page for INDEX at create time&lt;/B&gt;&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt; ls -lrh&lt;br /&gt;total 132K&lt;br /&gt;-rw-rw----  1 mysql mysql 112K Oct 10 22:00 testvarchar.ibd&lt;br /&gt;-rw-rw----  1 mysql mysql 8.5K Oct 10 22:00 testvarchar.frm&lt;br /&gt;-rw-rw----  1 mysql mysql   65 Oct 10 00:16 db.opt&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Adding 117 rows&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;call resettest();&lt;br /&gt;call populate(117);&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt; ls -lrh&lt;br /&gt;total 164K&lt;br /&gt;-rw-rw----  1 mysql mysql 144K Oct 10 22:16 testvarchar.ibd&lt;br /&gt;-rw-rw----  1 mysql mysql 8.5K Oct 10 22:16 testvarchar.frm&lt;br /&gt;-rw-rw----  1 mysql mysql   65 Oct 10 00:16 db.opt&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;call resettest();&lt;br /&gt;call populate(234); // 64K / 280 =~ 234&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;//GROWS another 32K&lt;br /&gt;ls -lrh&lt;br /&gt;total 196K&lt;br /&gt;-rw-rw----  1 mysql mysql 176K Oct 10 22:17 testvarchar.ibd&lt;br /&gt;-rw-rw----  1 mysql mysql 8.5K Oct 10 22:17 testvarchar.frm&lt;br /&gt;-rw-rw----  1 mysql mysql   65 Oct 10 00:16 db.opt&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;All though this is may all look right, in fact it's not. This may be a good approximation, but I am not taking into account the additional overhead added by INNODB to force the datafile to grow prior to this calculation at a much earlier row count. I have not tracked down what that is yet. It may be that innodb needs to reserved space based on the 2xheight of the binary tree to make sure there is enough space to undo leaf splitting. I'm sure that INNODB doesn't allocate the full 255 bytes for varchar in indexes or does it? Maybe you can shed some light?&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-4745755115474423618?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/4745755115474423618/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=4745755115474423618' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4745755115474423618'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/4745755115474423618'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/10/innodb-filepertable-growth-based-on.html' title='INNODB file_per_table growth based on Data Added'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8125558144530050101</id><published>2007-09-26T16:53:00.000-07:00</published><updated>2007-09-26T17:49:15.852-07:00</updated><title type='text'>Developing a new application</title><content type='html'>So, I'm developing a new application that's in the capacity planning phase. The application is designed to scale linearly and scale is very easy to do just by adding another database server. But, at what point do you upgrade? What point in the applications life do you add new servers to serve the expectation of the users?&lt;br /&gt;&lt;br /&gt;To figure out these questions, thresholds need to be defined. When these thresholds are exceeded then upgrades need to occur.&lt;br /&gt;&lt;br /&gt;Here is a process (my process) of questions to ask to define thresholds: First I start out with generic questions listed below.&lt;br /&gt;&lt;br /&gt;What is the expected amount of users who are going to use the product initially?&lt;br /&gt;&lt;br /&gt;What is the expected usage pattern? For instance if they are adding data what is the add rate? If they are reading data what is the read rate?&lt;br /&gt;&lt;br /&gt;Given that the first two questions are answered now ask when is it going to break. If you can answer this your golden. This is the hard part. How do you know when it's going to break and under what conditions without putting the service live. The best thing to do IMHO is to benchmark the system under typical usage patterns then double that.&lt;br /&gt;&lt;br /&gt;In my case, I know for sure that it will work on launch and for months there after but I don't know when it's going to fail. Since getting hardware here is an involved process I need to know ahead of time when things will fail-it makes us all honest.&lt;br /&gt;&lt;br /&gt;To get an idea on when it will break, I'm gauge'ing the passive additions that are in production now to get a baseline on how much data grows from day to day. In my case the dataset grows 10GB per day spread across 5 servers, thus 2GB per day from a single point of server view. Now I have a base line. In how many days will the application fail to perform under thresholds previously set? The thresholds set, are defined as-all data retrieval and addition must not take longer then 300ms for all components involved. So, when the passive additions on average take 20% of 300ms then I know the application is about to hit my own personal saturation point, thus I must ask for new equipment.&lt;br /&gt;&lt;br /&gt;But, the problem is not as clear cut, really I need to answer the question does InnoDB performance degrade at O(nlogn) when adding strings to it? If not what is the degradation of string addition and retrieval when the dataset is HUGE like @40-200+ GB&lt;br /&gt;&lt;br /&gt;(Where did I get log(n) from? Well, indexes in INNODB are B-Tree's-I/O performance should degrade at nlogn as data grows.) &lt;br /&gt;&lt;br /&gt;Some good reading mySQL &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/insert-speed.html"&gt;Insert-Speed&lt;/a&gt;&lt;br /&gt;&lt;DD/&gt; This is a little old but it's a good approximation.&lt;br /&gt;&lt;br /&gt;Things of note:&lt;br /&gt;&lt;br /&gt;innodb_buffer_pool_size=12000M&lt;br /&gt;innodb_additional_mem_pool_size=20M&lt;br /&gt;innodb_log_file_size=512M&lt;br /&gt;innodb_log_buffer_size=16M&lt;br /&gt;innodb_flush_log_at_trx_commit=2&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I must watch that the merged records count stays within a few hundred thousand of the inserted record count in the insert buffer and adaptive hash index part of SHOW ENGINE INNODB STATUS IF it doesn't then INNODB is hitting it's own limitation.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8125558144530050101?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8125558144530050101/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8125558144530050101' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8125558144530050101'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8125558144530050101'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/09/developing-new-application.html' title='Developing a new application'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-6826544228544701085</id><published>2007-09-14T14:34:00.000-07:00</published><updated>2007-09-14T15:14:04.350-07:00</updated><title type='text'>The mySQL Optimizer and your applications</title><content type='html'>What is the mySQL Optimizer? Well it's the part of the mysql query engine that determines what index to use, based on given sql. It sucks in every version that I tested, which is&lt;br /&gt;&lt;br /&gt;3.23&lt;br /&gt;4.0&lt;br /&gt;4.1&lt;br /&gt;5.0&lt;br /&gt;5.1&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I find that in nearly every app that I've end up writing I need to create my own optimizer to determine what index to use.&lt;br /&gt;&lt;br /&gt;Why?&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Well, imagine this. Your table has 30 indexes and many of the prefixes of compound indexes are the same. The reason is because given a question you want to sort the data quickly in different views. The optimizer sucks at figuring out which index to use under these cases (and many others)&lt;br /&gt;&lt;br /&gt;For example:&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;SELECT * FROM SomeTable WHERE owner_id = ? AND perms IN (0,1) AND karma IN (0,1) ORDER by date_create DESC, photo_id DESC limit 10&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;Now the table contains an index on&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;owner_id, perms, karma, date_create,photo_id&lt;br /&gt;owner_id, date_create, photo_id&lt;br /&gt;and 28 other index.&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Which index should mysql pick?&lt;br /&gt;&lt;br /&gt;Well since perms is a range and karma is a range then using the index (owner_id, perms, karma, date_create, photo_id) to avoid a filesort from an ORDER BY is not possible. The reason to avoid filesorts  especially in cases of large amounts of data, is because a filesort is 60% of the query time. So, the course of action is to pick the index where a filesort is avoided, thus&lt;br /&gt;&lt;br /&gt;owner_id, date_create, photo_id should be picked.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;But in fact mysql doesn't pick the correct index, ever. Innodb especially from query to query execution of the same type query the index changes over and over due to how innodb determines cardinality.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now, you may be thinking why not just run analyze table? Still mysql will pick the wrong index, and Analyze table for INNODB only dumps the innodb_buffer_pool which sucks.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;How does one get around this?&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Write your own optimizer! Here is some PHP snippits&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&lt;br /&gt;function sometable_get_index($where, $sort) {&lt;br /&gt;     if(strpos($where, 'owner_id') !== FALSE) {&lt;br /&gt;                        if (strpos($where, 'karma') !== FALSE) {&lt;br /&gt;                                if (strpos($sort,'date_create') !== FALSE &amp;&amp;&lt;br /&gt;                                        strpos($where, 'IN') !== FALSE) {&lt;br /&gt;&lt;br /&gt;                                        return 'USE INDEX (owner_datecreate_id)';&lt;br /&gt;                                }&lt;br /&gt;&lt;br /&gt;                                if(strpos($where, 'perms') !== FALSE &amp;&amp; !$sort) {&lt;br /&gt;&lt;br /&gt;                                        return 'USE INDEX (owner_perms_karma)';&lt;br /&gt;                                }&lt;br /&gt;&lt;br /&gt;                         }&lt;br /&gt;&lt;br /&gt;                        MORE Parser logic....&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;}&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Why use strpos? Well, it's faster then preg_match and strstr and I only care about the existence of SQL fields in the where and sort clause, not the contents of the rvalue of each column.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-6826544228544701085?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/6826544228544701085/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=6826544228544701085' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6826544228544701085'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/6826544228544701085'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/09/mysql-optimizer-and-your-applications.html' title='The mySQL Optimizer and your applications'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-1315255235405449817</id><published>2007-08-23T16:34:00.001-07:00</published><updated>2007-08-23T16:45:08.198-07:00</updated><title type='text'>InnoDB: Number of pending reads 128, pending pread calls 0</title><content type='html'>&lt;blockquote&gt;InnoDB: Number of pending reads 128, pending pread calls 0&lt;br /&gt;InnoDB: Error: InnoDB has waited for 50 seconds for pending&lt;br /&gt;InnoDB: reads to the buffer pool to be finished.&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;What does this mean? Well searching yahoo search I couldn't find anything other then pointers to source code where this message generated. I assumed it was a disk problem because performance was super crappy.&lt;br /&gt;&lt;br /&gt;My disk monitoring software which probes MegaRaidCli didn't report a failed disk. Linux didn't report a failed disk or any errors. So, what is happening? Was I wrong?&lt;br /&gt;&lt;br /&gt;Well, in fact it was a failed disk, but not in a state that could be classified as failed by the controller or any other component including the OS. With the help of my bud we found that a disk in the array was in a critical state-the disk itself just had a bunch of sector errors, yet the controller, os, everything would not mark it as dead - so the end result was to remove the drive and swap it with another.&lt;br /&gt;&lt;br /&gt;So in essence this message means something is wrong with your disks and or filesystem.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-1315255235405449817?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/1315255235405449817/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=1315255235405449817' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1315255235405449817'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/1315255235405449817'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/08/innodb-number-of-pending-reads-128.html' title='InnoDB: Number of pending reads 128, pending pread calls 0'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-2793334095467283252</id><published>2007-08-17T19:11:00.000-07:00</published><updated>2007-08-17T19:14:24.298-07:00</updated><title type='text'>Oooh really bad bug in 4.1.23, 4.1.24b INNODB only</title><content type='html'>&lt;a href="http://bugs.mysql.com/bug.php?id=30485"&gt;Bug: 30485&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;&lt;pre&gt;[miguel@skybr 4.1]$ bin/mysql -uroot db77&lt;br /&gt;Welcome to the MySQL monitor.  Commands end with ; or \g.&lt;br /&gt;Your MySQL connection id is 2 to server version: 4.1.24-debug&lt;br /&gt;&lt;br /&gt;Type 'help;' or '\h' for help. Type '\c' to clear the buffer.&lt;br /&gt;&lt;br /&gt;mysql&gt; CREATE TABLE `GiftCodes` (&lt;br /&gt;    -&gt;   `code` varchar(32) collate utf8_bin NOT NULL default '',&lt;br /&gt;    -&gt;   `used_by_id` bigint(10) unsigned NOT NULL default '0',&lt;br /&gt;    -&gt;   PRIMARY KEY  (`code`)&lt;br /&gt;    -&gt; ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;&lt;br /&gt;Query OK, 0 rows affected (0.00 sec)&lt;br /&gt;&lt;br /&gt;mysql&gt; &lt;br /&gt;mysql&gt; insert into GiftCodes values ('foo',7);&lt;br /&gt;Query OK, 1 row affected (0.00 sec)&lt;br /&gt;&lt;br /&gt;mysql&gt; select * from GiftCodes where code='foo';&lt;br /&gt;+------+------------+&lt;br /&gt;| code | used_by_id |&lt;br /&gt;+------+------------+&lt;br /&gt;| foo  |          7 |&lt;br /&gt;+------+------------+&lt;br /&gt;1 row in set (0.00 sec)&lt;br /&gt;&lt;br /&gt;mysql&gt; update GiftCodes set used_by_id='1' where code='foo';&lt;br /&gt;Query OK, 0 rows affected (0.00 sec)&lt;br /&gt;Rows matched: 0  Changed: 0  Warnings: 0&lt;br /&gt;&lt;br /&gt;mysql&gt; select * from GiftCodes where code='foo';&lt;br /&gt;+------+------------+&lt;br /&gt;| code | used_by_id |&lt;br /&gt;+------+------------+&lt;br /&gt;| foo  |          7 |&lt;br /&gt;+------+------------+&lt;br /&gt;1 row in set (0.00 sec)&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;/blockquote&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-2793334095467283252?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/2793334095467283252/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=2793334095467283252' title='14 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2793334095467283252'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/2793334095467283252'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/08/oooh-really-bad-bug-in-4123-4124b.html' title='Oooh really bad bug in 4.1.23, 4.1.24b INNODB only'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>14</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-7047883043377853395</id><published>2007-07-20T14:00:00.000-07:00</published><updated>2007-07-20T15:02:46.618-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='MASTER_POS_WAIT'/><category scheme='http://www.blogger.com/atom/ns#' term='master dual replication'/><category scheme='http://www.blogger.com/atom/ns#' term='slave'/><category scheme='http://www.blogger.com/atom/ns#' term='replication'/><title type='text'>Replication Syncing MASTER_POS_WAIT</title><content type='html'>&lt;LI&gt;MASTER_POS_WAIT&lt;/LI&gt;&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt; This function is useful for control of master/slave synchronization. It blocks until the slave has read and applied all updates up to the specified position in the master log. The return value is the number of log events the slave had to wait for to advance to the specified position. The function returns NULL if the slave SQL thread is not started, the slave's master information is not initialized, the arguments are incorrect, or an error occurs. It returns -1 if the timeout has been exceeded. If the slave SQL thread stops while MASTER_POS_WAIT() is waiting, the function returns NULL. If the slave is past the specified position, the function returns immediately.&lt;br /&gt;&lt;br /&gt;If a timeout value is specified, MASTER_POS_WAIT() stops waiting when timeout seconds have elapsed. timeout must be greater than 0; a zero or negative timeout means no timeout.&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;This is great for applications which auto promote masters, but one has to account for  race conditions.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;For example, dbmaster1 and dbmaster2 are in bi-directional replication displayed below.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;dbmaster1 &lt;----&gt; dbmaster2&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;dbmaster2 has slaves called dbslaves1 and dbslaves2. We want to replace dbmaster2 with dbslave1.&lt;br /&gt;&lt;br /&gt;On dbmaster1 issue:&lt;br /&gt;&lt;br /&gt;FLUSH LOGS;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;On dbmaster2 stop external writes to it and execute&lt;br /&gt;&lt;br /&gt;SELECT MASTER_POS_WAIT('dbmaster1-bin.000002', 4); SLAVE STOP;&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;db3&gt;show slave status\G&lt;br /&gt;*************************** 1. row ***************************&lt;br /&gt;             Slave_IO_State: &lt;br /&gt;                Master_Host: dbmaster1&lt;br /&gt;                Master_User: replication&lt;br /&gt;                Master_Port: 3306&lt;br /&gt;              Connect_Retry: 60&lt;br /&gt;            Master_Log_File: dbmaster1-bin.000002&lt;br /&gt;        Read_Master_Log_Pos: 3696603&lt;br /&gt;             Relay_Log_File: dbslave-relay.000005&lt;br /&gt;              Relay_Log_Pos: 597702176&lt;br /&gt;      Relay_Master_Log_File: dbmaster1-bin.000002&lt;br /&gt;           Slave_IO_Running: No&lt;br /&gt;          Slave_SQL_Running: No&lt;br /&gt;            Replicate_Do_DB: &lt;br /&gt;        Replicate_Ignore_DB: &lt;br /&gt;         Replicate_Do_Table: &lt;br /&gt;     Replicate_Ignore_Table: &lt;br /&gt;    Replicate_Wild_Do_Table: &lt;br /&gt;Replicate_Wild_Ignore_Table: &lt;br /&gt;                 Last_Errno: 0&lt;br /&gt;                 Last_Error: &lt;br /&gt;               Skip_Counter: 0&lt;br /&gt;        &lt;b&gt;Exec_Master_Log_Pos: 805&lt;/b&gt;&lt;br /&gt;            Relay_Log_Space: 601376092&lt;br /&gt;            Until_Condition: None&lt;br /&gt;             Until_Log_File: &lt;br /&gt;              Until_Log_Pos: 0&lt;br /&gt;         Master_SSL_Allowed: No&lt;br /&gt;         Master_SSL_CA_File: &lt;br /&gt;         Master_SSL_CA_Path: &lt;br /&gt;            Master_SSL_Cert: &lt;br /&gt;          Master_SSL_Cipher: &lt;br /&gt;             Master_SSL_Key: &lt;br /&gt;      Seconds_Behind_Master: NULL&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Notice Exec_Master_Log_Pos is at POS 805. This is a small window of events that passed. So, make sure to account for it.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now on dbmaster2 issue:&lt;br /&gt;&lt;br /&gt;FLUSH LOGS&lt;br /&gt;&lt;br /&gt;Do the same steps for dbslave1-2 with MASTER_WAIT_POS but for the position on dbmaster2 not dbmaster1.&lt;br /&gt;&lt;br /&gt;Make dbslave1 a master by adding&lt;br /&gt;&lt;br /&gt;log-bin to my.cnf&lt;br /&gt;&lt;br /&gt;Issue CHANGE MASTER TO on dbslave1 to dbmaster1 // this moves dbslave1 to dbmaster1&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;CHANGE MASTER TO MASTER_HOST='dbmaster1', MASTER_LOG_POS=805, MASTER_LOG_FILE='dbmaster1.000002';&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;Issue CHANGE MASTER TO on dbslave2 to dbslave1 // this moves dbslave2 to dbslave1&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;CHANGE MASTER TO MASTER_HOST='dbslave1', MASTER_LOG_POS=4, MASTER_LOG_FILE='dbslave1-bin.000001';&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Enable writes on dbslave1&lt;br /&gt;&lt;br /&gt;done.&lt;br /&gt;&lt;br /&gt;I wrote an application that does this all for me, using IPC, fork, db connections and the "algorithm" above. If your interested in it. I'll post it here.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-7047883043377853395?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/7047883043377853395/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=7047883043377853395' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7047883043377853395'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/7047883043377853395'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/07/replication-syncing-masterposwait.html' title='Replication Syncing MASTER_POS_WAIT'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-506948249963394116</id><published>2007-06-25T16:16:00.000-07:00</published><updated>2007-06-25T18:11:04.266-07:00</updated><title type='text'>INNODB and strings</title><content type='html'>&lt;pre&gt;&lt;br /&gt;mysql&gt; CREATE TABLE innodb_string_test (h varchar(512) ) TYPE=INNODB;&lt;br /&gt;Query OK, 0 rows affected, 2 warnings (0.55 sec)&lt;br /&gt;&lt;br /&gt;mysql&gt; show warnings;&lt;br /&gt;+---------+------+--------------------------------------------------------------------------+&lt;br /&gt;| Level   | Code | Message                                                                  |&lt;br /&gt;+---------+------+--------------------------------------------------------------------------+&lt;br /&gt;| Warning | 1246 | Converting column 'h' from CHAR to TEXT                                  |&lt;br /&gt;| Warning | 1287 | 'TYPE=storage_engine' is deprecated; use 'ENGINE=storage_engine' instead |&lt;br /&gt;+---------+------+--------------------------------------------------------------------------+&lt;br /&gt;2 rows in set (0.00 sec)&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Technically innodb supports defining varchar's greater then 255 characters (utf8 are double bytes so don't assume that 255 characters mean bytes-FYI). &lt;br /&gt;&lt;br /&gt;MySQL the server doesn't allow this.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;INNODB as of mysql-5.0.3 store stings in COMPACT format. This increases CPU usage slightly but saves nearly double the diskspace, thus given an application twice the memory. (Smaller data, more of it fits in memory).&lt;br /&gt;&lt;br /&gt;To turn this feature off, define table ROW_FORMAT=REDUNDENT.&lt;br /&gt;&lt;br /&gt;For more details on the Physical structure of strings in INNODB go &lt;a href="http://dev.mysql.com/doc/refman/5.0/en/innodb-physical-record.html"&gt; here&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;In summary: use varchar over char.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;[edited for correctness - thanks Ken Jacobs]&lt;br /&gt;&lt;br /&gt;Looking at this slide from INNOBASE.com - in 5.1 they will start using a COMPRESSED format based off of zlib. Here are some numbers and details:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://innodb.com/wp/wp-content/uploads/2007/05/innodb-architecture-locking-mysql-uc-2007.pdf" alt="pdf presentation"&gt;mysqluc2007&lt;/a&gt;&lt;br /&gt;&lt;table border=1&gt;&lt;br /&gt;&lt;tr&gt;&lt;br /&gt;&lt;thead&gt;&lt;br /&gt;&lt;th&gt;&amp;nbsp;&lt;/th&gt;&lt;th&gt;Uncompressed&lt;/th&gt;&lt;th&gt;Compressed&lt;/th&gt;&lt;br /&gt;&lt;/thead&gt;&lt;br /&gt;&lt;/tr&gt;&lt;br /&gt;&lt;tbody&gt;&lt;br /&gt;&lt;tr&gt;&lt;td&gt;File Size &lt;/td&gt;&lt;td&gt;2.8GB&lt;/td&gt;&lt;td&gt;1.4 GB&lt;/td&gt;&lt;/tr&gt;&lt;br /&gt;&lt;tr&gt;&lt;td&gt;Insert/sec&lt;/td&gt;&lt;td&gt; 1300&lt;/td&gt;&lt;td&gt;1000&lt;/td&gt;&lt;/tr&gt;&lt;br /&gt;&lt;tr&gt;&lt;td&gt;CPU Usage&lt;/td&gt;&lt;td&gt; 5%-50%&lt;/td&gt;&lt;td&gt;15%-50%&lt;/td&gt;&lt;/tr&gt;&lt;br /&gt;&lt;/tbody&gt;&lt;br /&gt;&lt;/table&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-506948249963394116?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/506948249963394116/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=506948249963394116' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/506948249963394116'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/506948249963394116'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/06/innodb-and-strings.html' title='INNODB and strings'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-3266489619517197035</id><published>2007-06-12T17:18:00.001-07:00</published><updated>2007-06-12T17:28:48.591-07:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='sql'/><category scheme='http://www.blogger.com/atom/ns#' term='fancy'/><category scheme='http://www.blogger.com/atom/ns#' term='duplicate'/><category scheme='http://www.blogger.com/atom/ns#' term='mysql'/><category scheme='http://www.blogger.com/atom/ns#' term='insert'/><title type='text'>Unique strings in a text field</title><content type='html'>So, I want to reduce data usage of a text field, by storing unique strings separated by a delimiter. So to do so I came up with this:&lt;br /&gt;&lt;br /&gt;Given a table&lt;br /&gt;&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;CREATE TABLE `hmm` (&lt;br /&gt;  `a` int(10) unsigned NOT NULL default '0',&lt;br /&gt;  `b` text NOT NULL,&lt;br /&gt;  `c` int(10) unsigned NOT NULL default '0',&lt;br /&gt;  PRIMARY KEY  (`a`)&lt;br /&gt;) ENGINE=InnoDB DEFAULT CHARSET=latin1&lt;br /&gt;&lt;br /&gt;INSERT INTO hmm VALUES (1, 'hmm8',1) ON DUPLICATE KEY UPDATE b = IF(FIND_IN_SET('hmm8', b)&gt;0, b, CONCAT_WS(',',b, 'hmm8')), c = c+1;&lt;br /&gt;&lt;br /&gt;SELECT * FROM hmm;&lt;br /&gt;+---+--------------------------+---+&lt;br /&gt;| a | b                        | c |&lt;br /&gt;+---+--------------------------+---+&lt;br /&gt;| 1 | hmm4,hmm5,hmm6,hmm7,hmm8 | 7 |&lt;br /&gt;+---+--------------------------+---+&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Let's break apart the INSERT statement, if column 'a' produces a duplicate &lt;i&gt;ON DUPLICATE KEY UPDATE&lt;/i&gt; will issue an IF statement&lt;br /&gt;&lt;br /&gt;FIND_IN_SET will return the position of the found string. The logic of IF say if FIND_IN_SET is is greater then 0 i.e. the string was found, update column b with b itself (do nothing) else concat the column with the column contents and the new string separated by a comma. Additionally in all cases increment column c by 1.&lt;br /&gt;&lt;br /&gt;Why do it this way? Well I removed the need to do a SELECT and provided a solution to store distinct text values instead of the raw value, thus saving space and resources on network transfers etc. &lt;br /&gt;&lt;br /&gt;A little quick statement that might be useful for others.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-3266489619517197035?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/3266489619517197035/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=3266489619517197035' title='5 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3266489619517197035'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/3266489619517197035'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/06/unique-strings-in-text-field.html' title='Unique strings in a text field'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>5</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-364652700727680835</id><published>2007-06-11T14:21:00.000-07:00</published><updated>2007-06-11T15:35:04.125-07:00</updated><title type='text'>Multi-Master Replication, looking over the code-base</title><content type='html'>I'm running into a situation, where I need real multi-master replication, yet as we all know this doesn't exist (yet) in mysql.&lt;br /&gt;&lt;br /&gt;So, as a result I'm investigating how I could implement multi-master replication. Fortunately mySQL has documented some of the source code and hosts this documentation online.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://dev.mysql.com/sources/doxygen/mysql-5.1/slave_8cc-source.html"&gt;http://dev.mysql.com/sources/doxygen/mysql-5.1/slave_8cc-source.html&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;There are already references and hooks for multi-master replication in slave.cc&lt;br /&gt;&lt;pre&gt;&lt;br /&gt;&lt;br /&gt; /*&lt;br /&gt;      TODO: replace the line below with&lt;br /&gt;      list_walk(&amp;master_list, (list_walk_action)end_slave_on_walk,0);&lt;br /&gt;      once multi-master code is ready.&lt;br /&gt; */&lt;br /&gt;&lt;br /&gt;&lt;/pre&gt;&lt;br /&gt;&lt;br /&gt;Of all the ideas that I have batted around, I've come to a common conclusion, modify mysql source to enable multi-master replication or make an external daemon that would essentially do the same thing.&lt;br /&gt;&lt;br /&gt;&lt;UL&gt;&lt;br /&gt;&lt;br /&gt;In both cases here are my requirements.&lt;br /&gt; &lt;br /&gt;&lt;LI&gt; It would need to read multiple replication events &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; It would need to log these multiple replication events &lt;/LI&gt;&lt;br /&gt;&lt;LI&gt; It would need a command interface to dynamically add,remove,change hosts &lt;/LI&gt;&lt;br /&gt;&lt;br /&gt;&lt;/UL&gt;&lt;br /&gt;&lt;br /&gt;I could either build a method for pushing replication events to a list of hosts, or build a subscribe method which I'm leaning towards.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-364652700727680835?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/364652700727680835/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=364652700727680835' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/364652700727680835'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/364652700727680835'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/06/multi-master-replication-looking-over.html' title='Multi-Master Replication, looking over the code-base'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-31421954.post-8861843096036391306</id><published>2007-06-02T15:57:00.000-07:00</published><updated>2007-06-02T16:05:54.387-07:00</updated><title type='text'>INNODB Disk setup and mount options</title><content type='html'>Assumptions:&lt;br /&gt;&lt;br /&gt;RHEL&lt;br /&gt;x86_64&lt;br /&gt;EXT3&lt;br /&gt;RAID&lt;br /&gt;&lt;br /&gt;What Raid to use?&lt;br /&gt;RAID-10&lt;br /&gt;&lt;br /&gt;Why?&lt;br /&gt;It's faster. RAID-5 offers more disk space but the parity bit messes things up, unless you have some uber hardware-raid card that caches that operation. Personally I am not a  fan.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Stripe Size:&lt;br /&gt;128K - this is really good for INNODB, you'll see a huge boost in responsiveness by making your Stripe Size 128K. I had a 64K stripe size, and I was blown away by the improvement of 128K&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Mount options:&lt;br /&gt;&lt;br /&gt;mkfs.ext3 -T largefile | mkfs.ext3 -T largefile4&lt;br /&gt;&lt;br /&gt;Unless your going to have millions of files, this is a good option.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Make sure /etc/fstab mounts the mysql partition or the data that mysql resides on with noatime.&lt;br /&gt;&lt;br /&gt;atime is accesstime: this is a huge boost in performance, tracking each time your ibdata file was last access is worthless, so don't do it.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;From other benchmarks that I ran, I think that IBM's open source File system may be better then ext3 for certain workloads. Once I have more information I will post it here.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/31421954-8861843096036391306?l=mysqldba.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://mysqldba.blogspot.com/feeds/8861843096036391306/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://www.blogger.com/comment.g?blogID=31421954&amp;postID=8861843096036391306' title='4 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8861843096036391306'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/31421954/posts/default/8861843096036391306'/><link rel='alternate' type='text/html' href='http://mysqldba.blogspot.com/2007/06/innodb-disk-setup-and-mount-options.html' title='INNODB Disk setup and mount options'/><author><name>Dathan Pattishall</name><uri>https://profiles.google.com/113910139807841637853</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh4.googleusercontent.com/-1obZojJYwaU/AAAAAAAAAAI/AAAAAAAAAD0/a4sMfVpQybQ/s512-c/photo.jpg'/></author><thr:total>4</thr:total></entry></feed>
